|
// Copyright (C) 2018 Andreas Romeyke (art1@andreas-romeyke.de), 2018.
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
#include "bag.hpp"
|
|
#include <boost/filesystem.hpp>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include "payload.hpp"
|
|
#include <sstream>
|
|
#include <string>
|
|
//#include <filesystem> // c++17
|
|
|
|
//namespace fs = std::filesystem;
|
|
namespace fs = boost::filesystem;
|
|
using namespace std;
|
|
|
|
|
|
Bag::Bag( string dfname ) {
|
|
// log << "load constructor (" << dfname << ")" << endl;
|
|
// read in file bagit.txt
|
|
string bagit_txt_path = dfname + "bagit.txt";
|
|
//log << "parse " << bagit_txt_path << endl;
|
|
ifstream bagit_txt_file;
|
|
bagit_txt_file.open( bagit_txt_path );
|
|
if (bagit_txt_file.is_open()) {
|
|
string version_line;
|
|
string utf8_line;
|
|
getline(bagit_txt_file, version_line);
|
|
getline(bagit_txt_file, utf8_line);
|
|
bagit_txt_file.close();
|
|
stringstream version_ss ( version_line );
|
|
string major;
|
|
string minor;
|
|
string vprefix;
|
|
getline(version_ss, vprefix, ' ');
|
|
getline(version_ss, major ,'.');
|
|
getline(version_ss, minor, '.');
|
|
if (0 != vprefix.compare("BagIt-Version:")) {
|
|
// log << "wrong vprefix='" << vprefix << "', but 'BagIt-Version:' expected" << endl;
|
|
}
|
|
//log << "major:'"<<major<<"'"<<endl;
|
|
//log << "minor:'"<<minor<<"'"<<endl;
|
|
Bag::bagit_version_major = stoi(major);
|
|
Bag::bagit_version_minor = stoi(minor);
|
|
|
|
stringstream utf8_ss (utf8_line);
|
|
string uprefix;
|
|
string uvalue;
|
|
getline(utf8_ss, uprefix, ' ');
|
|
getline(utf8_ss, uvalue, ' ');
|
|
if (0 != uprefix.compare("Tag-File-Character-Encoding:")) {
|
|
//log << "wrong uprefix='" << uprefix << "', but 'Tag-File-Character-Encoding:' expected" << endl;
|
|
}
|
|
Bag::tag_file_character_encoding = uvalue;
|
|
//log << "Bagit Version ("<< version_line << ") major=" << Bag::bagit_version_major << " minor=" << Bag::bagit_version_minor << endl;
|
|
} else {
|
|
Bag::log << "file " << bagit_txt_path << " could not be opened" <<endl;
|
|
}
|
|
// read in payload
|
|
Bag::payload_p = new Payload( dfname ) ;
|
|
list<string> files = Bag::payload_p->get_all_relative_paths();
|
|
// read in payload manifest
|
|
Bag::payloadmanifest_p = new Payloadmanifest(dfname);
|
|
// read in tagmanifest
|
|
Bag::tagmanifest_p = new Tagmanifest(dfname);
|
|
// read in baginfo
|
|
Bag::bagmetadata_p = new Bagmetadata(dfname);
|
|
map<string,string> md = Bag::bagmetadata_p->get_metadata();
|
|
map<string,string>::iterator m;
|
|
for (m=md.begin(); m!=md.end(); m++) {
|
|
// log << m->first << " = " << m->second <<endl;
|
|
}
|
|
|
|
list<string>::iterator i;
|
|
for (i=files.begin(); i!=files.end(); i++) {
|
|
// log << "file/dir (rel):" << (*i) << endl;
|
|
//log << "file/dir (abs):" << (*i) << endl;
|
|
}
|
|
Bag::payloadmanifest_p->get_checksum_file_pairs( md5 );
|
|
Bag::tagmanifest_p->get_checksum_file_pairs( md5 );
|
|
|
|
}
|
|
|
|
list<string> Bag::get_all_bag_files() {
|
|
list<string> files;
|
|
files.emplace_back("bagit.txt" );
|
|
files.emplace_back("bag-info.txt" );
|
|
if (nullptr != this->tagmanifest_p) {
|
|
// TODO(art1): this->tagmanifest_p->get_all_checksum_files()
|
|
}
|
|
return files;
|
|
}
|
|
|
|
bool Bag::store( string basedir ) {
|
|
|
|
fs::path p{ basedir };
|
|
fs::file_status s = fs::status( p );
|
|
if (fs::is_directory( s)) {
|
|
log << "directory '" << basedir << "' already exists" << endl;
|
|
return false;
|
|
}
|
|
fs::create_directory(p);
|
|
|
|
// store payload
|
|
if (nullptr == Bag::payload_p) {
|
|
log << "Payload object needed" << endl;
|
|
return false;
|
|
}
|
|
Bag::payload_p->store( basedir );
|
|
// store payload manifest
|
|
if (nullptr == Bag::payloadmanifest_p) {
|
|
log << "Payloadmanifest object needed" << endl;
|
|
return false;
|
|
}
|
|
list<string> payload_files =Bag::payload_p->get_all_relative_paths();
|
|
log << "PAYLOAD" <<endl;
|
|
Bag::payloadmanifest_p->store( basedir, payload_files);
|
|
// store baginfo
|
|
if (nullptr == Bag::bagmetadata_p) {
|
|
log << "Bagmetadata object needed" << endl;
|
|
return false;
|
|
}
|
|
Bag::bagmetadata_p->store( basedir );
|
|
// store fetchfile (if needed)
|
|
if (nullptr == Bag::fetchfile_p) {
|
|
log << "Fetchfile object needed" << endl;
|
|
|
|
} else {
|
|
Bag::fetchfile_p->store( basedir );
|
|
}
|
|
// store other
|
|
if (nullptr == Bag::othertags_p) {
|
|
log << "Othertags object needed" << endl;
|
|
|
|
} else {
|
|
Bag::othertags_p->store( basedir );
|
|
}
|
|
// store bag itself
|
|
string bagit_txt_path = basedir + "bagit.txt";
|
|
ofstream bagit_txt_file;
|
|
bagit_txt_file.open( bagit_txt_path );
|
|
if (bagit_txt_file.is_open()) {
|
|
bagit_txt_file << ("BagIt-Version: " + to_string(Bag::bagit_version_major) + "." + to_string(Bag::bagit_version_minor)) << endl;
|
|
bagit_txt_file << ("Tag-File-Character-Encoding: " + Bag::tag_file_character_encoding) << endl;
|
|
bagit_txt_file.close();
|
|
} else {
|
|
log << "file " << bagit_txt_path << "could not be open for writing" << endl;
|
|
return false;
|
|
}
|
|
// at least (!), store tagmanifest
|
|
if (nullptr == Bag::tagmanifest_p) {
|
|
log << "Tagmanifest object needed" << endl;
|
|
return false;
|
|
}
|
|
list<string> bagfiles = Bag::get_all_bag_files();
|
|
log << "TAGMANIFEST" << endl;
|
|
list<string>::iterator it;
|
|
for (it=bagfiles.begin(); it!= bagfiles.end(); ++it) {
|
|
log << "TAGMANIFEST-file '" << *it << "'" << endl;
|
|
}
|
|
Bag::tagmanifest_p->store( basedir, bagfiles );
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
bool Bag::validate() {
|
|
bool is_valid = true;
|
|
if (this->bagit_version_major != 0) {
|
|
this->log << "Bagit major version 0 is expected, but got: " << to_string(this->bagit_version_major) << endl;
|
|
is_valid = false;
|
|
}
|
|
if (this->bagit_version_minor != 97) {
|
|
this->log << "Bagit minor version 97 is expected, but got: " << to_string(this->bagit_version_minor) << endl;
|
|
is_valid = false;
|
|
}
|
|
if (0 != tag_file_character_encoding.compare( "UTF-8" )) {
|
|
this->log << "Bagit character encoding UTF-8 is expected, but got: " << this->tag_file_character_encoding << endl;
|
|
is_valid = false;
|
|
}
|
|
if (nullptr == this->payload_p) {
|
|
this->log << "Bagit payload directory 'data/' is expected, but could not found" << endl;
|
|
is_valid = false;
|
|
} else {
|
|
bool ret = this->payload_p->validate();
|
|
if (!ret) {
|
|
is_valid = false;
|
|
}
|
|
}
|
|
if (nullptr == this->payloadmanifest_p) {
|
|
is_valid = false;
|
|
} else {
|
|
// checksums check
|
|
this->log << "Bagit payload manifest" << endl;
|
|
bool ret = this->payloadmanifest_p->validate();
|
|
if (!ret) {
|
|
is_valid = false;
|
|
}
|
|
if (nullptr != this->payload_p) {
|
|
// check if payload checksums missed for payload files
|
|
// HINT: not requested by draft, therefore only a warning
|
|
list<string> payload_files = this->payload_p->get_all_relative_paths();
|
|
list<string> payload_manifest_files = this->payloadmanifest_p->get_checksummed_files();
|
|
list<string> missed_files;
|
|
payload_files.sort();
|
|
payload_manifest_files.sort();
|
|
//log << "PAYLOAD_FILES:" << endl;
|
|
//auto it = payload_files.begin();
|
|
//while( it != payload_files.end()) {
|
|
// log << "\t"<<(*it++) << endl;
|
|
//}
|
|
|
|
|
|
//log << "PAYLOADMANIFEST_FILES:" << endl;
|
|
//it = payload_manifest_files.begin();
|
|
//while( it != payload_manifest_files.end()) {
|
|
// log << "\t"<<(*it++) << endl;
|
|
//}
|
|
auto it1 = payload_files.begin();
|
|
auto it2 = payload_manifest_files.begin();
|
|
while( it1 != payload_files.end() && it2 != payload_manifest_files.end() ) {
|
|
int cmp_res = (*it1).compare( *it2);
|
|
//log << "COMP: "<<cmp_res<<" file='"<< (*it1) << "' checksummed file='" << (*it2) << "'" << endl;
|
|
if ( cmp_res < 0) {
|
|
this->log << "Bagit warning, file '" << (*it1) << "' in payload has no checksum entry in payload manifest" << endl;
|
|
it1++;
|
|
} else if ( cmp_res > 0) {
|
|
it2++;
|
|
} else {
|
|
it1++;
|
|
it2++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// next elements are optional
|
|
if (nullptr == this->tagmanifest_p) {
|
|
} else {
|
|
this->log << "Bagit tag manifest" << endl;
|
|
bool ret = this->tagmanifest_p->validate();
|
|
if (!ret) {
|
|
is_valid = false;
|
|
}
|
|
}
|
|
if (nullptr == this->bagmetadata_p) {
|
|
} else {
|
|
bool ret = this->bagmetadata_p->validate();
|
|
if (!ret) {
|
|
is_valid = false;
|
|
}
|
|
if (this->bagmetadata_p->has_PayloadOxum()) {
|
|
// check oxum of payload
|
|
Checksum c;
|
|
if (this->payload_p != nullptr) {
|
|
list<string> files = this->payload_p->get_all_absolute_paths();
|
|
oxum_t expected_oxum = this->bagmetadata_p->get_PayloadOxum();
|
|
oxum_t calculated_oxum = c.oxum_of_filelist( files );
|
|
if (expected_oxum.octetcount != calculated_oxum.octetcount) {
|
|
this->log << "Bagit payload oxum octectcount=" << to_string(expected_oxum.octetcount) << " expected, but " << to_string(calculated_oxum.octetcount) << " found" << endl;
|
|
is_valid = false;
|
|
}
|
|
if (expected_oxum.streamcount != calculated_oxum.streamcount) {
|
|
this->log << "Bagit payload oxum streamcount=" << to_string(expected_oxum.streamcount) << " expected, but " << to_string(calculated_oxum.streamcount) << " found" << endl;
|
|
is_valid = false;
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
/*
|
|
if (NULL == this->fetchfile_p) {
|
|
} else {
|
|
bool ret = this->fetchfile_p->validate( log );
|
|
if (ret == false) {
|
|
is_valid = false;
|
|
}
|
|
}
|
|
|
|
if (NULL == this->othertags_p) {
|
|
} else {
|
|
bool ret = this->othertags_p->validate( log);
|
|
if (ret == false) {
|
|
is_valid = false;
|
|
}
|
|
}
|
|
*/
|
|
|
|
return is_valid;
|
|
}
|
|
|
|
void Bag::get_logstream( stringstream & log ) {
|
|
log << this->log.rdbuf();
|
|
}
|
|
|
|
void Bag::reset_logstream() {
|
|
this->log.str(std::string());
|
|
}
|
|
// vim: set tabstop=4 softtabstop=0 expandtab shiftwidth=4 smarttab
|
|
|