Browse Source

- changed signature of validate() methods

- added checksum class, uses openssl
master
parent
commit
a0c5797fe8
16 changed files with 186 additions and 102 deletions
  1. +1
    -1
      src/include/bag.hpp
  2. +2
    -1
      src/include/bagmetadata.hpp
  3. +15
    -0
      src/include/checksum.hpp
  4. +4
    -2
      src/include/manifest.hpp
  5. +1
    -1
      src/include/payload.hpp
  6. +0
    -2
      src/include/payloadmanifest.hpp
  7. +0
    -1
      src/include/tagmanifest.hpp
  8. +3
    -3
      src/lib/bagmetadata.cpp
  9. +45
    -0
      src/lib/checksum.cpp
  10. +9
    -16
      src/lib/load_bag.cpp
  11. +21
    -5
      src/lib/manifest.cpp
  12. +6
    -6
      src/lib/payload.cpp
  13. +6
    -4
      src/lib/payloadmanifest.cpp
  14. +1
    -1
      src/lib/tagmanifest.cpp
  15. +65
    -59
      src/lib/validate_bag.cpp
  16. +7
    -0
      src/test/test_bag.cpp

+ 1
- 1
src/include/bag.hpp View File

@ -31,7 +31,7 @@ class Bag {
Bag();
Bag(string dfname);
bool store( string dfname);
bool validate();
bool validate( list<string> & log );
};
#endif
// vim: set tabstop=4 softtabstop=0 expandtab shiftwidth=4 smarttab

+ 2
- 1
src/include/bagmetadata.hpp View File

@ -2,6 +2,7 @@
#define LIBCBAG_BAGMETADATA
#include <string>
#include <map>
#include <list>
using namespace std;
enum {
@ -30,7 +31,7 @@ class Bagmetadata{
Bagmetadata( string basedir );
bool has_bagmetadata();
map<string,string> get_metadata();
bool validate();
bool validate( list<string> & log );
};
#endif


+ 15
- 0
src/include/checksum.hpp View File

@ -1,5 +1,20 @@
#ifndef LIBCBAG_CHECKSUM
#define LIBCBAG_CHECKSUM
#include <list>
#include <string>
#include <openssl/md5.h>
#include <openssl/sha.h>
using namespace std;
enum checksum_algorithms {md5=1, sha1};
class Checksum {
public:
//Checksum();
//list<string> get_supported_checksum_algorithms();
//bool is_algorithm_supported( string alg );
string checksum_of_file(string filepath, checksum_algorithms alg);
};
#endif
// vim: set tabstop=4 softtabstop=0 expandtab shiftwidth=4 smarttab

+ 4
- 2
src/include/manifest.hpp View File

@ -2,15 +2,17 @@
#define LIBCBAG_MANIFEST
#include <string>
#include <map>
#include <list>
#include "checksum.hpp"
using namespace std;
class Manifest{
private:
protected:
string basedir;
map<checksum_algorithms,string> manifest_algorithm_files;
public:
virtual map<string,string> get_checksum_file_pairs(checksum_algorithms algorithm);
virtual bool validate();
virtual bool validate( list<string> & log );
};
#endif


+ 1
- 1
src/include/payload.hpp View File

@ -16,7 +16,7 @@ class Payload{
Payload( string basedir );
list<string> get_all_relative_paths();
list<string> get_all_absolute_paths();
bool validate();
bool validate(list<string> & log);
};
#endif
// vim: set tabstop=4 softtabstop=0 expandtab shiftwidth=4 smarttab


+ 0
- 2
src/include/payloadmanifest.hpp View File

@ -8,8 +8,6 @@
using namespace std;
class Payloadmanifest : public Manifest {
private:
map<checksum_algorithms,string> manifest_algorithm_files;
public:
Payloadmanifest( string basedir );
};


+ 0
- 1
src/include/tagmanifest.hpp View File

@ -9,7 +9,6 @@ using namespace std;
class Tagmanifest : public Manifest {
private:
map<checksum_algorithms,string> manifest_algorithm_files;
bool exist_manifest_files;
public:
Tagmanifest( string basedir );


+ 3
- 3
src/lib/bagmetadata.cpp View File

@ -76,8 +76,8 @@ map <string,string> Bagmetadata::get_metadata() {
return this->metadata;
}
bool Bagmetadata::validate() {
// TODO
return true;
bool Bagmetadata::validate(list<string> & log ) {
// TODO
return true;
}
// vim: set tabstop=4 softtabstop=0 expandtab shiftwidth=4 smarttab

+ 45
- 0
src/lib/checksum.cpp View File

@ -0,0 +1,45 @@
#include <iostream>
#include <iomanip>
#include <fstream>
#include <sstream>
#include <string>
#include "checksum.hpp"
string Checksum::checksum_of_file(string filepath, checksum_algorithms alg) {
ifstream file (filepath, ios::ate);
stringstream hex_result;
if (file.is_open() ) {
ifstream::pos_type fileSize;
char * memBlock;
fileSize = file.tellg();
memBlock = new char[fileSize];
file.seekg(0,ios::beg);
file.read(memBlock, fileSize);
switch ( alg ) {
case md5: {
unsigned char result[MD5_DIGEST_LENGTH];
MD5((unsigned char*) memBlock, fileSize, result);
for (int i=0; i<MD5_DIGEST_LENGTH; i++) {
hex_result<< hex << setw(2) << setfill('0') << (int) result[i];
}
break;
}
case sha1: {
unsigned char result[SHA_DIGEST_LENGTH];
SHA1((unsigned char*) memBlock, fileSize, result);
for (int i=0; i<SHA_DIGEST_LENGTH; i++) {
hex_result<< hex << setw(2) << setfill('0') << (int) result[i];
}
break;
}
}
// cout << "# # CHECKSUM for filepath '" << filepath << "' " << hex_result.str() << endl;
file.close();
} else {
cout << "file '"<< filepath << "' could not be opened" << endl;
}
return hex_result.str();
}
// vim: set tabstop=4 softtabstop=0 expandtab shiftwidth=4 smarttab

+ 9
- 16
src/lib/load_bag.cpp View File

@ -6,10 +6,10 @@
#include <string>
Bag::Bag( string dfname ) {
cout << "load constructor (" << dfname << ")" << endl;
// cout << "load constructor (" << dfname << ")" << endl;
// read in file bagit.txt
string bagit_txt_path = dfname + "bagit.txt";
log << "parse " << bagit_txt_path << endl;
//log << "parse " << bagit_txt_path << endl;
ifstream bagit_txt_file;
bagit_txt_file.open( bagit_txt_path );
if (bagit_txt_file.is_open()) {
@ -26,10 +26,10 @@ Bag::Bag( string dfname ) {
getline(version_ss, major ,'.');
getline(version_ss, minor, '.');
if (0 != vprefix.compare("BagIt-Version:")) {
log << "wrong vprefix='" << vprefix << "', but 'BagIt-Version:' expected" << endl;
// log << "wrong vprefix='" << vprefix << "', but 'BagIt-Version:' expected" << endl;
}
cout << "major:'"<<major<<"'"<<endl;
cout << "minor:'"<<minor<<"'"<<endl;
//cout << "major:'"<<major<<"'"<<endl;
//cout << "minor:'"<<minor<<"'"<<endl;
Bag::bagit_version_major = stoi(major);
Bag::bagit_version_minor = stoi(minor);
@ -39,15 +39,10 @@ Bag::Bag( string dfname ) {
getline(utf8_ss, uprefix, ' ');
getline(utf8_ss, uvalue, ' ');
if (0 != uprefix.compare("Tag-File-Character-Encoding:")) {
log << "wrong uprefix='" << uprefix << "', but 'Tag-File-Character-Encoding:' expected" << endl;
//log << "wrong uprefix='" << uprefix << "', but 'Tag-File-Character-Encoding:' expected" << endl;
}
Bag::tag_file_character_encoding = uvalue;
cout << "Bagit Version ("<< version_line << ") major=" << Bag::bagit_version_major << " minor=" << Bag::bagit_version_minor << endl;
//cout << "Bagit Version ("<< version_line << ") major=" << Bag::bagit_version_major << " minor=" << Bag::bagit_version_minor << endl;
} else {
Bag::log << "file " << bagit_txt_path << " could not be opened" <<endl;
}
@ -64,19 +59,17 @@ Bag::Bag( string dfname ) {
map<string,string> md = Bag::bagmetadata_p->get_metadata();
map<string,string>::iterator m;
for (m=md.begin(); m!=md.end(); m++) {
cout << m->first << " = " << m->second <<endl;
// cout << m->first << " = " << m->second <<endl;
}
list<string>::iterator i;
for (i=files.begin(); i!=files.end(); i++) {
cout << "file/dir (rel):" << (*i) << endl;
// cout << "file/dir (rel):" << (*i) << endl;
//cout << "file/dir (abs):" << (*i) << endl;
}
Bag::payloadmanifest_p->get_checksum_file_pairs( md5 );
Bag::tagmanifest_p->get_checksum_file_pairs( md5 );
cout << "The bag '"<<dfname<<"' is " << (this->validate() ? "valid" : "invalid") << endl;
}
// vim: set tabstop=4 softtabstop=0 expandtab shiftwidth=4 smarttab


+ 21
- 5
src/lib/manifest.cpp View File

@ -12,7 +12,7 @@ namespace fs = boost::filesystem;
using namespace std;
map<string,string> Manifest::get_checksum_file_pairs(checksum_algorithms alg) {
string filename = Manifest::manifest_algorithm_files[ alg ];
string filename = this->manifest_algorithm_files[ alg ];
map<string,string> checksum_file_pairs;
cout << "using file " << filename << endl;
ifstream file;
@ -26,15 +26,31 @@ map<string,string> Manifest::get_checksum_file_pairs(checksum_algorithms alg) {
line_ss >> checksum;
line_ss >> subfile;
checksum_file_pairs[checksum]=subfile;
cout << "checksum="<<checksum<<" file="<<subfile<<endl;
// cout << "checksum="<<checksum<<" file="<<subfile<<endl;
}
file.close();
}
return checksum_file_pairs;
}
bool Manifest::validate() {
// TODO
return true;
bool Manifest::validate( list<string> & log ) {
Checksum checksum;
bool is_valid = true;
for (map<checksum_algorithms,string>::iterator it=this->manifest_algorithm_files.begin(); it!=this->manifest_algorithm_files.end(); ++it) {
cout << "validate using file "<< (it->second) << " (" << (it->first) << ")" <<endl;
map<string,string>checksum_file_pairs = this->get_checksum_file_pairs( it->first );
for (map<string,string>::iterator ch=checksum_file_pairs.begin(); ch!=checksum_file_pairs.end(); ++ch) {
string expected_checksum = ch->first;
string file = this->basedir + ch->second;
string calc_checksum = checksum.checksum_of_file( file, it->first );
// cout << "\t" << "file="<<file <<" expected:"<<expected_checksum << " found: " << calc_checksum << endl;
if (0 != expected_checksum.compare( calc_checksum)) {
// cout << ( "Bagit file '" + file + "', checksum '" + expected_checksum + "' is expected, but found: '" + calc_checksum + "'") << endl;
log.push_back( "Bagit file '" + file + "', checksum '" + expected_checksum + "' is expected, but found: '" + calc_checksum + "'");
is_valid = false;
}
}
}
return is_valid;
}
// vim: set tabstop=4 softtabstop=0 expandtab shiftwidth=4 smarttab

+ 6
- 6
src/lib/payload.cpp View File

@ -11,9 +11,9 @@ using namespace std;
Payload::Payload( string basedir ) {
fs::path p{ basedir };
fs::file_status s = fs::status( p );
cout << "basedir "<< p.string() << endl;
cout << "is dir: "<< fs::is_directory( s) << endl;
cout << "exists: "<< fs::exists(s) << endl;
// cout << "basedir "<< p.string() << endl;
// cout << "is dir: "<< fs::is_directory( s) << endl;
// cout << "exists: "<< fs::exists(s) << endl;
this->basedir = basedir;
};
@ -49,8 +49,8 @@ list<string> Payload::get_all_absolute_paths() {
return strpaths;
}
bool Payload::validate() {
// TODO
return true;
bool Payload::validate( list<string> & log ) {
// TODO
return true;
}
// vim: set tabstop=4

+ 6
- 4
src/lib/payloadmanifest.cpp View File

@ -13,20 +13,22 @@ using namespace std;
Payloadmanifest::Payloadmanifest( string basedir ) {
Payloadmanifest::basedir = basedir;
map<checksum_algorithms, string> possible_manifest_files;
possible_manifest_files[md5] = "manifest-md5.txt";
possible_manifest_files[sha1] = "manifest-sha1.txt";
for (map<checksum_algorithms, string>::iterator it=possible_manifest_files.begin(); it!=possible_manifest_files.end(); ++it) {
// debug
cout << it->first << " => " << it->second << endl;
// test if file exists
string filename = basedir + it->second;
fs::path p{ filename };
fs::file_status s = fs::status( p );
cout << "path "<< p.string() << endl;
cout << "is file: "<< fs::is_regular_file( s) << endl;
// cout << "path "<< p.string() << endl;
// cout << "is file: "<< fs::is_regular_file( s) << endl;
if (fs::is_regular_file( s)) {
Payloadmanifest::manifest_algorithm_files[it->first] = filename;
// debug
cout << "PAYLOADMANIFEST: alg="<< it->first << " => filename=" << it->second << endl;
cout << " alg="<< it->first << " => filename=" << Payloadmanifest::manifest_algorithm_files[it->first] << endl;
}
}
}


+ 1
- 1
src/lib/tagmanifest.cpp View File

@ -13,6 +13,7 @@ using namespace std;
Tagmanifest::Tagmanifest( string basedir ) {
Tagmanifest::basedir = basedir;
map<checksum_algorithms, string> possible_manifest_files;
possible_manifest_files[md5] = "tagmanifest-md5.txt";
possible_manifest_files[sha1] = "tagmanifest-sha1.txt";
@ -38,7 +39,6 @@ map<string,string> Tagmanifest::get_checksum_file_pairs(checksum_algorithms alg)
for (map<string,string>::iterator it=checksum_file_pairs.begin(); it!=checksum_file_pairs.end(); ++it) {
if (it->second.find( "data/" ) == 0) {
cout << "found 'data/' in the beginning of file '"<<it->second<<"' in tagmanifest file '"<< this->manifest_algorithm_files[ alg] << endl;
}
}
return checksum_file_pairs;


+ 65
- 59
src/lib/validate_bag.cpp View File

@ -3,67 +3,73 @@
bool Bag::validate() {
bool is_valid = true;
if (this->bagit_version_major != 0) {
is_valid = false;
}
if (this->bagit_version_minor != 97) {
is_valid = false;
}
if (0 != tag_file_character_encoding.compare( "UTF-8" )) {
is_valid = false;
}
if (NULL == this->payload_p) {
is_valid = false;
} else {
bool ret = this->payload_p->validate();
if (ret == false) {
is_valid = false;
}
}
if (NULL == this->payloadmanifest_p) {
is_valid = false;
} else {
bool ret = this->payloadmanifest_p->validate();
if (ret == false) {
is_valid = false;
}
}
// next elements are optional
if (NULL == this->tagmanifest_p) {
} else {
bool ret = this->tagmanifest_p->validate();
if (ret == false) {
is_valid = false;
}
}
if (NULL == this->bagmetadata_p) {
} else {
bool ret = this->bagmetadata_p->validate();
if (ret == false) {
is_valid = false;
}
}
/*
if (NULL == this->fetchfile_p) {
} else {
bool ret = this->fetchfile_p->validate();
if (ret == false) {
is_valid = false;
}
}
bool Bag::validate( list<string> & log ) {
bool is_valid = true;
if (this->bagit_version_major != 0) {
log.push_back( "Bagit major version 0 is expected, but got: " + to_string(this->bagit_version_major));
is_valid = false;
}
if (this->bagit_version_minor != 97) {
log.push_back( "Bagit minor version 97 is expected, but got: " + to_string(this->bagit_version_minor));
is_valid = false;
}
if (0 != tag_file_character_encoding.compare( "UTF-8" )) {
log.push_back( "Bagit character encoding UTF-8 is expected, but got: " + this->tag_file_character_encoding);
is_valid = false;
}
if (NULL == this->payload_p) {
log.push_back( "Bagit payload directory 'data/' is expected, but could not found");
is_valid = false;
} else {
bool ret = this->payload_p->validate( log );
if (ret == false) {
is_valid = false;
}
}
if (NULL == this->payloadmanifest_p) {
is_valid = false;
} else {
log.push_back( "Bagit payload manifest");
bool ret = this->payloadmanifest_p->validate( log );
if (ret == false) {
is_valid = false;
}
}
// next elements are optional
if (NULL == this->tagmanifest_p) {
} else {
log.push_back( "Bagit tag manifest");
bool ret = this->tagmanifest_p->validate( log );
if (ret == false) {
is_valid = false;
}
}
if (NULL == this->bagmetadata_p) {
} else {
bool ret = this->bagmetadata_p->validate( log );
if (ret == false) {
is_valid = false;
}
}
/*
if (NULL == this->fetchfile_p) {
} else {
bool ret = this->fetchfile_p->validate( log );
if (ret == false) {
is_valid = false;
}
}
if (NULL == this->othertags_p) {
} else {
bool ret = this->othertags_p->validate();
if (ret == false) {
is_valid = false;
}
}
*/
if (NULL == this->othertags_p) {
} else {
bool ret = this->othertags_p->validate( log);
if (ret == false) {
is_valid = false;
}
}
*/
return is_valid;
return is_valid;
}
// vim: set tabstop=4 softtabstop=0 expandtab shiftwidth=4 smarttab


+ 7
- 0
src/test/test_bag.cpp View File

@ -11,6 +11,13 @@ int main(int argc, const char *argv[]) {
}
const string dir = argv[1];
Bag bag( dir );
list<string> log;
bool is_valid = bag.validate(log);
cout << "The bag '"<< dir <<"' is " << (is_valid ? "valid" : "invalid") << endl;
list<string>::const_iterator iterator;
for (iterator = log.begin(); iterator != log.end(); ++iterator) {
std::cout << *iterator << endl;
}
return 0;
}
// vim: set tabstop=4 softtabstop=0 expandtab shiftwidth=4 smarttab


Loading…
Cancel
Save