C++ Library to handle BagIt structures. BagIt is a standard format to create transfer packages for digital preservation purposes. See https://en.wikipedia.org/wiki/BagIt for details http://andreas-romeyke.de
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

181 lines
7.1 KiB

// Copyright (C) 2018 Andreas Romeyke (art1@andreas-romeyke.de), 2018.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
#include "manifest.hpp"
#include <iostream>
#include <fstream>
#include <sstream>
#include <boost/filesystem.hpp>
//#include <filesystem> // c++17
//namespace fs = std::filesystem;
namespace fs = boost::filesystem;
using namespace std;
Manifest::Manifest( const string basedir, const string file_prefix ) {
Manifest::basedir = basedir;
Manifest::base_manifest_file_prefix = file_prefix;
map<checksum_algorithms, string> possible_manifest_files;
Manifest::exist_manifest_files = false;
for (checksum_algorithms alg : checksum_algorithmsList) {
possible_manifest_files[alg] = basedir + Manifest::base_manifest_file_prefix + string_of_algorithm( alg ) + ".txt";
}
for (auto & possible_manifest_file : possible_manifest_files) {
// debug
// log << it->first << " => " << it->second << endl;
// test if file exists
//string filename = basedir + it->second;
string filename = possible_manifest_file.second;
checksum_algorithms alg = possible_manifest_file.first;
fs::path p{ filename };
fs::file_status s = fs::status( p );
this->log << "path "<< p.string() << endl;
this->log << "is file: "<< (fs::is_regular_file( s)?"yes":"no") << endl;
if (fs::is_regular_file( s)) {
Manifest::manifest_algorithm_files[alg] = filename;
Manifest::exist_manifest_files = true;
// debug
//log << "TAGMANIFEST: alg="<< it->first << " => filename=" << it->second << endl;
//log << "TAGMANIFEST2: alg="<< it->first << " => filename=" << filename << endl;
}
}
//log << "TAGMANIFEST constructor, calling debug()" << endl;
//this->debug();
//log << "TAGMANIFEST constructor, finished" << endl;
}
multimap<checksum_string_t,filename_t> Manifest::get_checksum_file_pairs(checksum_algorithms alg) {
string filename = this->manifest_algorithm_files[ alg ];
multimap<checksum_string_t,filename_t> checksum_file_pairs;
// log << "using file " << filename << endl;
ifstream file;
file.open( filename );
if (file.is_open()) {
string line;
while (getline(file, line)) {
stringstream line_ss ( line );
string checksum;
string subfile;
line_ss >> checksum;
line_ss >> subfile;
checksum_file_pairs.insert( pair<checksum_string_t, filename_t>(checksum,subfile) );
// log << "checksum="<<checksum<<" file="<<subfile<<endl;
}
file.close();
}
return checksum_file_pairs;
}
bool Manifest::validate() {
Checksum checksum;
bool is_valid = true;
bool at_least_one_manifest = false;
this->log << "validate exist manifestfiles = '"<< (this->exist_manifest_files?"yes":"no") <<endl;
this->log << "validate count = '"<< (this->manifest_algorithm_files.size()) <<endl;
for (auto & manifest_algorithm_file : this->manifest_algorithm_files) {
string manifest_file = manifest_algorithm_file.second;
checksum_algorithms alg = manifest_algorithm_file.first;
fs::path p{ manifest_file };
if (fs::is_regular_file( p )) {
at_least_one_manifest=true;
}
this->log << "validate using file '"<< (manifest_file) <<endl;
multimap<checksum_string_t,filename_t>checksum_file_pairs = this->get_checksum_file_pairs( alg );
for (auto & checksum_file_pair : checksum_file_pairs) {
string expected_checksum = checksum_file_pair.first;
string file = this->basedir + checksum_file_pair.second;
string calc_checksum = checksum.checksum_of_file( file, alg );
if (calc_checksum.empty()) {
this->log << "Bagit file '" << file << "', checksum '" << expected_checksum << "' is expected by file '" << manifest_file << "', but file was not found" << endl;
is_valid = false;
}
else if (0 != expected_checksum.compare( calc_checksum)) {
this->log << "Bagit file '" << file << "', checksum '" << expected_checksum << "' is expected by file '" << manifest_file << "', but found: '" << calc_checksum << "'" << endl;
is_valid = false;
}
fs::path p{ file };
// fs::file_status s = fs::status( p );
if (! fs::is_regular_file( p )) {
this->log << "Bagit file '" << file << "' does not exists in '" << this->basedir << "'" << endl;
}
}
}
if (! at_least_one_manifest) { is_valid = false; } // at least one manifest is needed!
log << "MANIFESTLOG:" << this->log.str() << endl;
return is_valid;
}
list<string> Manifest::get_checksummed_files() {
list<string> files;
for (auto & manifest_algorithm_file : this->manifest_algorithm_files) {
multimap<checksum_string_t,filename_t>checksum_file_pairs = this->get_checksum_file_pairs( manifest_algorithm_file.first );
for (auto & checksum_file_pair : checksum_file_pairs) {
string file = this->basedir + checksum_file_pair.second;
// log << "MF: '" << file << "'" << endl;
files.push_back( file );
}
}
files.sort();
files.unique();
return files;
}
bool Manifest::store( const string& basedir, list<string>& files ) {
map<checksum_algorithms, string> possible_manifest_files;
for (checksum_algorithms alg : checksum_algorithmsList) {
possible_manifest_files[alg] = Manifest::base_manifest_file_prefix + string_of_algorithm( alg ) + ".txt";
}
for (auto file : files) {
log << "DEBUG manifest-files file='" << file << "'" << endl;
}
Checksum checksum;
for (auto & possible_manifest_file : possible_manifest_files) {
checksum_algorithms alg = possible_manifest_file.first;
log << "DEBUG manifest::store alg=" << string_of_algorithm(alg) << endl;
if (!possible_manifest_file.second.empty()) {
// test if file exists
string algfilename = basedir + possible_manifest_file.second;
log << "DEBUG manifest::store, alg=" << string_of_algorithm(alg) << " file='"<<algfilename << "' (newbase='" << basedir << "', filename='" << possible_manifest_file.second << "'" <<endl;
ofstream alg_txt_file;
alg_txt_file.open( algfilename );
if (alg_txt_file.is_open()) {
list<string>::iterator ch;
for (ch=files.begin(); ch!=files.end(); ++ch) {
string filename = basedir + *ch;
log << "DEBUG manifest::store\tfilename='"<<filename<<"'"<<endl;
string checksum_string = checksum.checksum_of_file( filename, alg);
alg_txt_file << checksum_string << " " << *ch << endl;
}
alg_txt_file.close();
}
}
}
return true;
}
void Manifest::get_logstream( stringstream & log ) {
log << this->log.rdbuf();
}
void Manifest::reset_logstream() {
this->log.str(std::string());
}
// vim: set tabstop=4 softtabstop=0 expandtab shiftwidth=4 smarttab