sdr-replication 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/libdir.rb +3 -0
- data/lib/replication/archive_catalog.rb +110 -0
- data/lib/replication/bagit_bag.rb +337 -0
- data/lib/replication/command_consumer.rb +55 -0
- data/lib/replication/command_producer.rb +105 -0
- data/lib/replication/dpn_check_rep.rb +83 -0
- data/lib/replication/file_fixity.rb +98 -0
- data/lib/replication/fixity.rb +155 -0
- data/lib/replication/operating_system.rb +33 -0
- data/lib/replication/replica.rb +62 -0
- data/lib/replication/sdr_object_version.rb +63 -0
- data/lib/replication/tarfile.rb +160 -0
- data/lib/sdr_replication.rb +26 -0
- metadata +198 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: e99a0814e4383ec6287dec7df41825d786e65919
|
4
|
+
data.tar.gz: 4890c2758dd820f22ce8aa9015621e1b33dec9ae
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: deb3400e53fcbdf16cf8263ffe1deed70be83889633b4fecfa0de58e41a076e7f502de61d22f079b09ff6b81ddb29586bb6e898782f22e45010ecfd46d93828d
|
7
|
+
data.tar.gz: a0533d23addcc264e7aeca97f985bdad1d70e8081230e09ab5337a03314b058374541b1587f47a5f89dd2f558115677190d49b320b892a77e7031bb15ac9c3ff
|
data/lib/libdir.rb
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rest-client'
|
3
|
+
|
4
|
+
module Replication
|
5
|
+
|
6
|
+
# A wrapper class based on {RestClient} used to interface with the Archive Catalog service.
|
7
|
+
# <br>
|
8
|
+
# <br>
|
9
|
+
# The default RestClient behavior is:
|
10
|
+
# * for results code between 200 and 207 a RestClient::Response will be returned
|
11
|
+
# * for results code 301, 302 or 307 the redirection will be followed if the request is a get or a head
|
12
|
+
# * for result code 303 the redirection will be followed and the request transformed into a get
|
13
|
+
# * for other cases a RestClient::Exception holding the Response will be raised
|
14
|
+
#
|
15
|
+
# But we are using a technique that forces RestClient to always provide the response
|
16
|
+
# <br>
|
17
|
+
# <br>
|
18
|
+
# RestClient::Response has these instance methods (some inherited from AbstractResponse):
|
19
|
+
# * args
|
20
|
+
# * body
|
21
|
+
# * code (e.g. 204)
|
22
|
+
# * description (e.g. "204 No Content | 0 bytes")
|
23
|
+
# * headers
|
24
|
+
# * net_http_res
|
25
|
+
#
|
26
|
+
# @see https://github.com/rest-client/rest-client
|
27
|
+
# @see http://rubydoc.info/gems/rest-client/1.6.7/frames
|
28
|
+
class ArchiveCatalog
|
29
|
+
|
30
|
+
@root_uri = 'http://localhost:3000'
|
31
|
+
@timeout = 120
|
32
|
+
|
33
|
+
# @see https://www.google.com/search?q="class+<<+self"+"attr_accessor"
|
34
|
+
class << self
|
35
|
+
|
36
|
+
# @return [String] The base or home URL of the Archive Catalog web service
|
37
|
+
attr_accessor :root_uri
|
38
|
+
|
39
|
+
# @return [Integer] seconds to wait for a response or to open a connection. Value nil disables the timeout.
|
40
|
+
attr_accessor :timeout
|
41
|
+
|
42
|
+
# The base RestClient resource to be used for requests
|
43
|
+
def root_resource
|
44
|
+
RestClient::Resource.new(@root_uri, {:open_timeout => @timeout, :timeout => @timeout})
|
45
|
+
end
|
46
|
+
|
47
|
+
# Get the item record from the specified table for the specified primary key.
|
48
|
+
# @param [String] table name of the database table
|
49
|
+
# @param [String] id primary key for the item in the database table
|
50
|
+
# @return [Hash] the row (in key,value hash) from the specified table for the specified identifier.
|
51
|
+
# Response body contains the item data in JSON format, which is converted to a hash.
|
52
|
+
# @see http://tools.ietf.org/html/rfc2616#page-53
|
53
|
+
def get_item(table,id)
|
54
|
+
# Don't raise RestClient::Exception but return the response
|
55
|
+
headers = {:accept => 'application/json'}
|
56
|
+
response = root_resource["#{table}/#{id}.json"].get(headers) {|response, request, result| response }
|
57
|
+
case response.code.to_s
|
58
|
+
when '200'
|
59
|
+
JSON.parse(response.body)
|
60
|
+
else
|
61
|
+
raise response.description
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Retrieve an existing database record or add a new one using the data provided.
|
66
|
+
# @param [String] table name of the database table
|
67
|
+
# @param [Hash] hash the item data to be added to the database table
|
68
|
+
# @return [Hash] result containing the item data as if a GET were performed.
|
69
|
+
# The HTTP response code for success is 201 (Created).
|
70
|
+
# @see http://en.wikipedia.org/wiki/POST_(HTTP)
|
71
|
+
# @see http://tools.ietf.org/html/rfc2616#page-54
|
72
|
+
def find_or_create_item(table,hash)
|
73
|
+
payload = hash.to_json
|
74
|
+
headers = {:content_type => :json, :accept => :json}
|
75
|
+
# Don't raise RestClient::Exception but return the response
|
76
|
+
response = root_resource["#{table}.json"].post(payload, headers) {|response, request, result| response }
|
77
|
+
case response.code.to_s
|
78
|
+
when '201'
|
79
|
+
JSON.parse(response.body)
|
80
|
+
else
|
81
|
+
raise response.description
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Update the database columns for the specified item using the hash data.
|
86
|
+
# @param [String] table name of the database table
|
87
|
+
# @param [String] id primary key for the item in the database table
|
88
|
+
# @param [Hash] hash the item data to be updated in the database table
|
89
|
+
# @return (Boolean) true if the HTTP response code is 204, per specification for PATCH or PUT request types.
|
90
|
+
# Response body is empty, per same specification.
|
91
|
+
# @see https://tools.ietf.org/html/rfc5789
|
92
|
+
# @see http://stackoverflow.com/questions/797834/should-a-restful-put-operation-return-something/827045#827045
|
93
|
+
def update_item(table,id,hash)
|
94
|
+
payload = hash.to_json
|
95
|
+
headers = {:content_type => :json}
|
96
|
+
# Don't raise RestClient::Exception but return the response
|
97
|
+
response = root_resource["#{table}/#{id}.json"].patch(payload, headers) {|response, request, result| response }
|
98
|
+
case response.code.to_s
|
99
|
+
when '204'
|
100
|
+
true
|
101
|
+
else
|
102
|
+
raise response.description
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
@@ -0,0 +1,337 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../libdir')
|
2
|
+
require 'sdr_replication'
|
3
|
+
|
4
|
+
module Replication
|
5
|
+
|
6
|
+
# A BagIt bag contains a structured copy of a digital object for storage, transfer, or replication
|
7
|
+
# @see https://tools.ietf.org/html/draft-kunze-bagit-10
|
8
|
+
# This class can be used to create, parse, or validate a bag instance
|
9
|
+
#
|
10
|
+
# @note Copyright (c) 2014 by The Board of Trustees of the Leland Stanford Junior University.
|
11
|
+
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
12
|
+
class BagitBag
|
13
|
+
|
14
|
+
# @param [Pathname,String] pathname The location of the bag home directory
|
15
|
+
# @return [BagitBag] Initialize a new bag, create home and payload folders, write bagit.txt file
|
16
|
+
def BagitBag.create_bag(pathname)
|
17
|
+
bag = BagitBag.new
|
18
|
+
bag.bag_pathname = pathname
|
19
|
+
bag.payload_pathname.mkpath
|
20
|
+
bag.write_bagit_txt
|
21
|
+
bag
|
22
|
+
end
|
23
|
+
|
24
|
+
# @param [Pathname,String] pathname The location of the bag home directory
|
25
|
+
# @return [BagitBag] Initialize a new bag, create home and payload folders, write bagit.txt file
|
26
|
+
def BagitBag.open_bag(pathname)
|
27
|
+
bag = BagitBag.new
|
28
|
+
bag.bag_pathname = pathname
|
29
|
+
raise "No bag found at #{bag.bag_pathname}" unless bag.bag_pathname.exist?
|
30
|
+
bagit_txt = bag.bag_pathname.join("bagit.txt")
|
31
|
+
raise "No bagit.txt file found at #{bagit_txt}" unless bagit_txt.exist?
|
32
|
+
bag
|
33
|
+
end
|
34
|
+
|
35
|
+
# @return [Pathname] The location of the bag home directory
|
36
|
+
def bag_pathname
|
37
|
+
@bag_pathname
|
38
|
+
end
|
39
|
+
|
40
|
+
# @param [Pathname,String] pathname The location of the bag home directory
|
41
|
+
# @return [Void] Set the location of the bag home directory
|
42
|
+
def bag_pathname=(pathname)
|
43
|
+
@bag_pathname = Pathname(pathname)
|
44
|
+
end
|
45
|
+
|
46
|
+
# @return [Pathname] The location of the bag data directory
|
47
|
+
def payload_pathname
|
48
|
+
bag_pathname.join('data')
|
49
|
+
end
|
50
|
+
|
51
|
+
# @return [Pathname] Generate the bagit.txt tag file
|
52
|
+
def write_bagit_txt
|
53
|
+
bagit_txt = bag_pathname.join("bagit.txt")
|
54
|
+
bagit_txt.open('w') do |f|
|
55
|
+
f.puts "Tag-File-Character-Encoding: UTF-8"
|
56
|
+
f.puts "BagIt-Version: 0.97"
|
57
|
+
end
|
58
|
+
bagit_txt
|
59
|
+
end
|
60
|
+
|
61
|
+
# @return [Hash<String,String] A hash containing the properties documented in the bagit.txt tagfile
|
62
|
+
def read_bagit_txt
|
63
|
+
properties = Hash.new
|
64
|
+
bagit_txt = bag_pathname.join("bagit.txt")
|
65
|
+
bagit_txt.readlines.each do |line|
|
66
|
+
line.chomp!.strip!
|
67
|
+
key,value = line.split(':',2)
|
68
|
+
properties[key.strip] = value.strip if value
|
69
|
+
end
|
70
|
+
properties
|
71
|
+
end
|
72
|
+
|
73
|
+
# @return [Array<Symbol>] The list of checksum types to be used when generating fixity data
|
74
|
+
def bag_checksum_types
|
75
|
+
@bag_checksum_types ||= Fixity.default_checksum_types
|
76
|
+
end
|
77
|
+
|
78
|
+
# @param [Object] types The list of checksum types to be used when generating fixity data
|
79
|
+
# @return [Void] Set the list of checksum types to be used when generating fixity data
|
80
|
+
def bag_checksum_types=(*types)
|
81
|
+
@bag_checksum_types = Fixity.validate_checksum_types(*types)
|
82
|
+
end
|
83
|
+
|
84
|
+
# @param [Symbol] link_mode Specifies whether to :copy, :link, or :symlink the files to the payload directory
|
85
|
+
# @param [Pathname] source_dir The source location of the directory whose contents are to be ingested
|
86
|
+
# @return [Pathname] Generate file_fixity_hash and send it to #add_payload_files
|
87
|
+
def add_payload_dir (link_mode, source_dir)
|
88
|
+
file_fixity_hash = Fixity.generate_checksums(source_dir, nil ,bag_checksum_types)
|
89
|
+
add_payload_files(link_mode, source_dir, file_fixity_hash)
|
90
|
+
payload_pathname
|
91
|
+
end
|
92
|
+
|
93
|
+
# @param [Symbol] link_mode Specifies whether to :copy, :link, or :symlink the files to the payload directory
|
94
|
+
# @param [Pathname] source_basepath The source location of the directory whose contents are to be ingested
|
95
|
+
# @param [Hash<String,FileFixity>] file_fixity_hash The list of files (with fixity data) to be added to the payload
|
96
|
+
# @return [Pathname] Copy or link the files specified in the file_fixity_hash to the payload directory,
|
97
|
+
# then update the payload manifest files
|
98
|
+
def add_payload_files(link_mode, source_basepath, file_fixity_hash)
|
99
|
+
file_fixity_hash.keys.each do |file_id|
|
100
|
+
source_pathname = source_basepath.join(file_id)
|
101
|
+
target_pathname = payload_pathname.join(file_id)
|
102
|
+
copy_file(link_mode, source_pathname, target_pathname)
|
103
|
+
end
|
104
|
+
write_manifest_checksums('manifest', file_fixity_hash)
|
105
|
+
payload_pathname
|
106
|
+
end
|
107
|
+
|
108
|
+
# @param [Symbol] link_mode Specifies whether to :copy, :link, or :symlink the files to the payload directory
|
109
|
+
# @param [Pathname] source_pathname The source location of the file to be ingested
|
110
|
+
# @param [Pathname] target_pathname The location of the directory in which to place the file
|
111
|
+
# @return [Pathname] link or copy the specified file from source location to the target location
|
112
|
+
def copy_file(link_mode, source_pathname, target_pathname)
|
113
|
+
target_pathname.parent.mkpath
|
114
|
+
case link_mode
|
115
|
+
when :copy, nil
|
116
|
+
FileUtils.copy(source_pathname.to_s, target_pathname.to_s) # automatically dereferences symlinks
|
117
|
+
when :link
|
118
|
+
FileUtils.link(source_pathname.to_s, target_pathname.to_s) #, :force => true (false is default)
|
119
|
+
when :symlink
|
120
|
+
FileUtils.symlink(source_pathname.to_s, target_pathname.to_s) #, :force => true (false is default)
|
121
|
+
else
|
122
|
+
raise "Invalid link_mode: #{link_mode}, expected one of [:copy,:link,:symlink]"
|
123
|
+
end
|
124
|
+
target_pathname
|
125
|
+
end
|
126
|
+
|
127
|
+
# @param [Pathname,String] source_fullpath The location of the directory whose content will be tarred
|
128
|
+
# @param [Pathname,String] source_basepath The location of the directory to change to before doing the tar create
|
129
|
+
# @return [Tarfile] Create a tar archive of a directory into the payload directory,
|
130
|
+
# generating checksums in parallel processes and recording those checksums in the payload manifests
|
131
|
+
def add_payload_tarfile(tarfile_id,source_fullpath, source_basepath)
|
132
|
+
tarfile = Tarfile.new
|
133
|
+
tarfile.source_basepath = Pathname(source_basepath)
|
134
|
+
tarfile.source_fullpath = Pathname(source_fullpath)
|
135
|
+
tarfile.tarfile_basepath = payload_pathname
|
136
|
+
tarfile.tarfile_fullpath = payload_pathname.join("#{tarfile_id}")
|
137
|
+
tarfile.create_tarfile
|
138
|
+
file_fixity_hash = Fixity.generate_checksums(tarfile.tarfile_basepath,[tarfile.tarfile_fullpath],bag_checksum_types)
|
139
|
+
write_manifest_checksums('manifest', file_fixity_hash)
|
140
|
+
tarfile
|
141
|
+
end
|
142
|
+
|
143
|
+
# @return [Pathname] Generate the bag-info.txt tag file to record the payload size
|
144
|
+
def write_bag_info_txt
|
145
|
+
payload_size = bag_payload_size
|
146
|
+
bag_info_txt = bag_pathname.join("bag-info.txt")
|
147
|
+
bag_info_txt.open('w') do |f|
|
148
|
+
f.puts "External-Identifier: #{bag_pathname.basename}"
|
149
|
+
f.puts "Payload-Oxum: #{payload_size[:bytes]}.#{payload_size[:files]}"
|
150
|
+
f.puts "Bag-Size: #{bag_size_human(payload_size[:bytes])}"
|
151
|
+
end
|
152
|
+
bag_info_txt
|
153
|
+
end
|
154
|
+
|
155
|
+
# @return [Hash<Symbol,Integer>] A hash contining the payload size in bytes, and the number of files,
|
156
|
+
# derived from the payload directory contents
|
157
|
+
def bag_payload_size
|
158
|
+
payload_pathname.find.select{|f| f.file?}.inject({bytes: 0, files: 0}) do |hash,file|
|
159
|
+
hash[:bytes] += file.size
|
160
|
+
hash[:files] += 1
|
161
|
+
hash
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
# @param [Integer] bytes The total number of bytes in the payload
|
166
|
+
# @return [String] Human-readable rendition of the total payload size
|
167
|
+
def bag_size_human(bytes)
|
168
|
+
count = 0
|
169
|
+
size = bytes
|
170
|
+
while ( size >= 1024 and count < 4 )
|
171
|
+
size /= 1024.0
|
172
|
+
count += 1
|
173
|
+
end
|
174
|
+
if (count == 0)
|
175
|
+
return sprintf("%d B", size)
|
176
|
+
else
|
177
|
+
return sprintf("%.2f %s", size, %w[B KB MB GB TB][count] )
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
# @return [Hash<String,String] A hash containing the properties documented in the bag-info.txt tagfile
|
182
|
+
def read_bag_info_txt
|
183
|
+
properties = Hash.new
|
184
|
+
bag_info = bag_pathname.join("bag-info.txt")
|
185
|
+
bag_info.readlines.each do |line|
|
186
|
+
line.chomp!.strip!
|
187
|
+
key,value = line.split(':',2)
|
188
|
+
properties[key.strip] = value.strip if value
|
189
|
+
end
|
190
|
+
properties
|
191
|
+
end
|
192
|
+
|
193
|
+
# @return [Hash<Symbol,Integer>] A hash contining the payload size in bytes, and the number of files,
|
194
|
+
# derived from the Payload-Oxum property
|
195
|
+
def info_payload_size
|
196
|
+
info = read_bag_info_txt
|
197
|
+
size_array = info['Payload-Oxum'].split('.')
|
198
|
+
size_hash = {:bytes => size_array[0].to_i, :files => size_array[1].to_i}
|
199
|
+
size_hash
|
200
|
+
end
|
201
|
+
|
202
|
+
# @return [Boolean] Compare the actual measured payload size against the value recorded in bag-info.txt
|
203
|
+
def verify_payload_size
|
204
|
+
info_size = info_payload_size
|
205
|
+
bag_size = bag_payload_size
|
206
|
+
if info_size != bag_size
|
207
|
+
raise "Failed payload size verification! Expected: #{info_size}, Found: #{bag_size}"
|
208
|
+
end
|
209
|
+
true
|
210
|
+
end
|
211
|
+
|
212
|
+
# @return [Hash<String,FileFixity>] create hash containing ids and checksums for all files in the bag's root directory
|
213
|
+
def generate_tagfile_checksums
|
214
|
+
tagfiles = bag_pathname.children.reject{|file| file.basename.to_s.start_with?('tagmanifest')}
|
215
|
+
Fixity.generate_checksums(bag_pathname, tagfiles, bag_checksum_types )
|
216
|
+
end
|
217
|
+
|
218
|
+
# @return [Hash<String,FileFixity>] create hash containing ids and checksums for all files in the bag's payload
|
219
|
+
def generate_payload_checksums
|
220
|
+
Fixity.generate_checksums(payload_pathname, nil, bag_checksum_types)
|
221
|
+
end
|
222
|
+
|
223
|
+
# @param [String] manifest_type The type of manifest file ('manifest' or 'tagmanifest') to be updated
|
224
|
+
# @param [Hash<String,FileFixity>] file_fixity_hash A hash containing file ids and fixity data
|
225
|
+
# @param [String] open_mode The file open mode (default is 'a')
|
226
|
+
# @return [Hash<Symbol,Pathname] Update each of the manifests with data from the file_fixity_hash
|
227
|
+
def write_manifest_checksums(manifest_type, file_fixity_hash, open_mode='a')
|
228
|
+
manifests = Hash.new
|
229
|
+
self.bag_checksum_types.each do |checksum_type|
|
230
|
+
manifest_pathname = bag_pathname.join("#{manifest_type}-#{checksum_type}.txt")
|
231
|
+
manifest_file = manifest_pathname.open(open_mode)
|
232
|
+
file_fixity_hash.values.each do |fixity|
|
233
|
+
checksum = fixity.get_checksum(checksum_type)
|
234
|
+
manifest_file.puts("#{checksum} #{fixity.file_id}") if checksum
|
235
|
+
end
|
236
|
+
manifest_file.close
|
237
|
+
manifests[checksum_type] = manifest_pathname
|
238
|
+
end
|
239
|
+
manifests
|
240
|
+
end
|
241
|
+
|
242
|
+
# @param [String] manifest_type The type of manifest file ('manifest' or 'tagmanifest') to be read
|
243
|
+
# @return [Hash<String,FileFixity>] A hash containing file ids and fixity data derived from the manifest files
|
244
|
+
def read_manifest_files(manifest_type)
|
245
|
+
file_fixity_hash = Hash.new
|
246
|
+
checksum_type_list = Array.new
|
247
|
+
Fixity.valid_checksum_ids.each do |checksum_type|
|
248
|
+
manifest_pathname = bag_pathname.join("#{manifest_type}-#{checksum_type}.txt")
|
249
|
+
if manifest_pathname.file?
|
250
|
+
checksum_type_list << checksum_type
|
251
|
+
manifest_pathname.readlines.each do |line|
|
252
|
+
line.chomp!.strip!
|
253
|
+
checksum,file_id = line.split(/[\s*]+/,2)
|
254
|
+
file_fixity = file_fixity_hash[file_id] || FileFixity.new(file_id: file_id)
|
255
|
+
file_fixity.set_checksum(checksum_type,checksum)
|
256
|
+
file_fixity_hash[file_id] = file_fixity
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|
260
|
+
self.bag_checksum_types = self.bag_checksum_types | checksum_type_list
|
261
|
+
file_fixity_hash
|
262
|
+
end
|
263
|
+
|
264
|
+
# @return [Boolean] Compare fixity data from the tag manifest files against the values measured by digesting the files
|
265
|
+
def verify_tagfile_manifests
|
266
|
+
manifest_type = 'tagmanifest'
|
267
|
+
manifest_fixity_hash = read_manifest_files(manifest_type)
|
268
|
+
bag_fixity_hash = generate_tagfile_checksums
|
269
|
+
verify_manifests(manifest_type, manifest_fixity_hash, bag_fixity_hash)
|
270
|
+
end
|
271
|
+
|
272
|
+
# @return [Boolean] Compare fixity data from the payload manifest files against the values measured by digesting the files
|
273
|
+
def verify_payload_manifests
|
274
|
+
manifest_type = 'manifest'
|
275
|
+
manifest_fixity_hash = read_manifest_files(manifest_type)
|
276
|
+
bag_fixity_hash = generate_payload_checksums
|
277
|
+
verify_manifests(manifest_type, manifest_fixity_hash, bag_fixity_hash)
|
278
|
+
end
|
279
|
+
|
280
|
+
# @param [String] manifest_type The type of manifest file ('manifest' or 'tagmanifest') to be read
|
281
|
+
# @param [Hash<String,FileFixity>] manifest_fixity_hash A hash containing file ids and fixity data derived from the manifest files
|
282
|
+
# @param [Hash<String,FileFixity>] bag_fixity_hash A hash containing file ids and fixity data derived from the actual files
|
283
|
+
# @return [Boolean] Compare fixity data from the manifest files against the values measured by digesting the files,
|
284
|
+
# returning true if equal or false if not equal
|
285
|
+
def verify_manifests(manifest_type, manifest_fixity_hash, bag_fixity_hash)
|
286
|
+
diff = manifest_diff(manifest_fixity_hash, bag_fixity_hash)
|
287
|
+
if diff.size > 0
|
288
|
+
raise "Failed #{manifest_type} verification! Differences: \n#{diff.inspect}"
|
289
|
+
end
|
290
|
+
true
|
291
|
+
end
|
292
|
+
|
293
|
+
# @param [Hash<String,FileFixity>] manifest_fixity_hash A hash containing file ids and fixity data derived from the manifest files
|
294
|
+
# @param [Hash<String,FileFixity>] bag_fixity_hash A hash containing file ids and fixity data derived from the actual files
|
295
|
+
# @return [Hash] A report of the differences between the fixity data from the manifest files
|
296
|
+
# against the values measured by digesting the files
|
297
|
+
def manifest_diff(manifest_fixity_hash, bag_fixity_hash)
|
298
|
+
diff = Hash.new
|
299
|
+
(manifest_fixity_hash.keys | bag_fixity_hash.keys).each do |file_id|
|
300
|
+
manifest_fixity = manifest_fixity_hash[file_id] || FileFixity.new(file_id: file_id)
|
301
|
+
bag_fixity = bag_fixity_hash[file_id] || FileFixity.new(file_id: file_id)
|
302
|
+
if manifest_fixity != bag_fixity
|
303
|
+
diff[file_id] = manifest_fixity.diff(bag_fixity,'manifest','bag')
|
304
|
+
end
|
305
|
+
end
|
306
|
+
diff
|
307
|
+
end
|
308
|
+
|
309
|
+
# @return [Boolean] Validate the bag containing the digital object
|
310
|
+
def verify_bag
|
311
|
+
verify_bag_structure
|
312
|
+
verify_tagfile_manifests
|
313
|
+
verify_payload_size
|
314
|
+
verify_payload_manifests
|
315
|
+
true
|
316
|
+
end
|
317
|
+
|
318
|
+
# @return [Boolean] Test the existence of expected files, return true if files exist, raise exception if not
|
319
|
+
def verify_bag_structure
|
320
|
+
required_files = ['data','bagit.txt','bag-info.txt','manifest-sha256.txt','tagmanifest-sha256.txt']
|
321
|
+
required_files.each{|filename| verify_pathname(bag_pathname.join(filename))}
|
322
|
+
optional_files = []
|
323
|
+
true
|
324
|
+
end
|
325
|
+
|
326
|
+
# @param [Pathname] pathname The file whose existence should be verified
|
327
|
+
# @return [Boolean] Test the existence of the specified path. Return true if file exists, raise exception if not
|
328
|
+
def verify_pathname(pathname)
|
329
|
+
raise "#{pathname.basename} not found at #{pathname}" unless pathname.exist?
|
330
|
+
true
|
331
|
+
end
|
332
|
+
|
333
|
+
|
334
|
+
end
|
335
|
+
|
336
|
+
|
337
|
+
end
|