C++ Library to handle BagIt structures. BagIt is a standard format to create transfer packages for digital preservation purposes. See https://en.wikipedia.org/wiki/BagIt for details http://andreas-romeyke.de
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

166 lines
6.4 KiB

  1. #include "manifest.hpp"
  2. #include <iostream>
  3. #include <fstream>
  4. #include <sstream>
  5. #include <boost/filesystem.hpp>
  6. //#include <filesystem> // c++17
  7. //namespace fs = std::filesystem;
  8. namespace fs = boost::filesystem;
  9. using namespace std;
  10. Manifest::Manifest( string basedir, string file_prefix ) {
  11. Manifest::basedir = basedir;
  12. Manifest::base_manifest_file_prefix = file_prefix;
  13. map<checksum_algorithms, string> possible_manifest_files;
  14. Manifest::exist_manifest_files = false;
  15. for (checksum_algorithms alg : checksum_algorithmsList) {
  16. possible_manifest_files[alg] = basedir + Manifest::base_manifest_file_prefix + string_of_algorithm( alg ) + ".txt";
  17. }
  18. for (auto & possible_manifest_file : possible_manifest_files) {
  19. // debug
  20. // log << it->first << " => " << it->second << endl;
  21. // test if file exists
  22. //string filename = basedir + it->second;
  23. string filename = possible_manifest_file.second;
  24. checksum_algorithms alg = possible_manifest_file.first;
  25. fs::path p{ filename };
  26. fs::file_status s = fs::status( p );
  27. this->log << "path "<< p.string() << endl;
  28. this->log << "is file: "<< (fs::is_regular_file( s)?"yes":"no") << endl;
  29. if (fs::is_regular_file( s)) {
  30. Manifest::manifest_algorithm_files[alg] = filename;
  31. Manifest::exist_manifest_files = true;
  32. // debug
  33. //log << "TAGMANIFEST: alg="<< it->first << " => filename=" << it->second << endl;
  34. //log << "TAGMANIFEST2: alg="<< it->first << " => filename=" << filename << endl;
  35. }
  36. }
  37. //log << "TAGMANIFEST constructor, calling debug()" << endl;
  38. //this->debug();
  39. //log << "TAGMANIFEST constructor, finished" << endl;
  40. }
  41. multimap<checksum_string_t,filename_t> Manifest::get_checksum_file_pairs(checksum_algorithms alg) {
  42. string filename = this->manifest_algorithm_files[ alg ];
  43. multimap<checksum_string_t,filename_t> checksum_file_pairs;
  44. // log << "using file " << filename << endl;
  45. ifstream file;
  46. file.open( filename );
  47. if (file.is_open()) {
  48. string line;
  49. while (getline(file, line)) {
  50. stringstream line_ss ( line );
  51. string checksum;
  52. string subfile;
  53. line_ss >> checksum;
  54. line_ss >> subfile;
  55. checksum_file_pairs.insert( pair<checksum_string_t, filename_t>(checksum,subfile) );
  56. // log << "checksum="<<checksum<<" file="<<subfile<<endl;
  57. }
  58. file.close();
  59. }
  60. return checksum_file_pairs;
  61. }
  62. bool Manifest::validate() {
  63. Checksum checksum;
  64. bool is_valid = true;
  65. bool at_least_one_manifest = false;
  66. this->log << "validate exist manifestfiles = '"<< (this->exist_manifest_files?"yes":"no") <<endl;
  67. this->log << "validate count = '"<< (this->manifest_algorithm_files.size()) <<endl;
  68. for (auto & manifest_algorithm_file : this->manifest_algorithm_files) {
  69. string manifest_file = manifest_algorithm_file.second;
  70. checksum_algorithms alg = manifest_algorithm_file.first;
  71. fs::path p{ manifest_file };
  72. if (fs::is_regular_file( p )) {
  73. at_least_one_manifest=true;
  74. }
  75. this->log << "validate using file '"<< (manifest_file) <<endl;
  76. multimap<checksum_string_t,filename_t>checksum_file_pairs = this->get_checksum_file_pairs( alg );
  77. for (auto & checksum_file_pair : checksum_file_pairs) {
  78. string expected_checksum = checksum_file_pair.first;
  79. string file = this->basedir + checksum_file_pair.second;
  80. string calc_checksum = checksum.checksum_of_file( file, alg );
  81. if (calc_checksum.empty()) {
  82. this->log << "Bagit file '" << file << "', checksum '" << expected_checksum << "' is expected by file '" << manifest_file << "', but file was not found" << endl;
  83. is_valid = false;
  84. }
  85. else if (0 != expected_checksum.compare( calc_checksum)) {
  86. this->log << "Bagit file '" << file << "', checksum '" << expected_checksum << "' is expected by file '" << manifest_file << "', but found: '" << calc_checksum << "'" << endl;
  87. is_valid = false;
  88. }
  89. fs::path p{ file };
  90. // fs::file_status s = fs::status( p );
  91. if (! fs::is_regular_file( p )) {
  92. this->log << "Bagit file '" << file << "' does not exists in '" << this->basedir << "'" << endl;
  93. }
  94. }
  95. }
  96. if (! at_least_one_manifest) { is_valid = false; } // at least one manifest is needed!
  97. log << "MANIFESTLOG:" << this->log.str() << endl;
  98. return is_valid;
  99. }
  100. list<string> Manifest::get_checksummed_files() {
  101. list<string> files;
  102. for (auto & manifest_algorithm_file : this->manifest_algorithm_files) {
  103. multimap<checksum_string_t,filename_t>checksum_file_pairs = this->get_checksum_file_pairs( manifest_algorithm_file.first );
  104. for (auto & checksum_file_pair : checksum_file_pairs) {
  105. string file = this->basedir + checksum_file_pair.second;
  106. // log << "MF: '" << file << "'" << endl;
  107. files.push_back( file );
  108. }
  109. }
  110. return files;
  111. }
  112. bool Manifest::store( const string& basedir, list<string>& files ) {
  113. map<checksum_algorithms, string> possible_manifest_files;
  114. for (checksum_algorithms alg : checksum_algorithmsList) {
  115. possible_manifest_files[alg] = Manifest::base_manifest_file_prefix + string_of_algorithm( alg ) + ".txt";
  116. }
  117. for (auto file : files) {
  118. log << "DEBUG manifest-files file='" << file << "'" << endl;
  119. }
  120. Checksum checksum;
  121. for (auto & possible_manifest_file : possible_manifest_files) {
  122. checksum_algorithms alg = possible_manifest_file.first;
  123. log << "DEBUG manifest::store alg=" << string_of_algorithm(alg) << endl;
  124. if (!possible_manifest_file.second.empty()) {
  125. // test if file exists
  126. string algfilename = basedir + possible_manifest_file.second;
  127. log << "DEBUG manifest::store, alg=" << string_of_algorithm(alg) << " file='"<<algfilename << "' (newbase='" << basedir << "', filename='" << possible_manifest_file.second << "'" <<endl;
  128. ofstream alg_txt_file;
  129. alg_txt_file.open( algfilename );
  130. if (alg_txt_file.is_open()) {
  131. list<string>::iterator ch;
  132. for (ch=files.begin(); ch!=files.end(); ++ch) {
  133. string filename = basedir + *ch;
  134. log << "DEBUG manifest::store\tfilename='"<<filename<<"'"<<endl;
  135. string checksum_string = checksum.checksum_of_file( filename, alg);
  136. alg_txt_file << checksum_string << " " << *ch << endl;
  137. }
  138. alg_txt_file.close();
  139. }
  140. }
  141. }
  142. return true;
  143. }
  144. void Manifest::get_logstream( stringstream & log ) {
  145. log << this->log.rdbuf();
  146. }
  147. void Manifest::reset_logstream() {
  148. this->log.str(std::string());
  149. }
  150. // vim: set tabstop=4 softtabstop=0 expandtab shiftwidth=4 smarttab