C++ Library to handle BagIt structures. BagIt is a standard format to create transfer packages for digital preservation purposes. See https://en.wikipedia.org/wiki/BagIt for details http://andreas-romeyke.de
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

182 lines
7.1KB

  1. // Copyright (C) 2018 Andreas Romeyke (art1@andreas-romeyke.de), 2018.
  2. //
  3. // This program is free software: you can redistribute it and/or modify
  4. // it under the terms of the GNU General Public License as published by
  5. // the Free Software Foundation, either version 3 of the License, or
  6. // (at your option) any later version.
  7. //
  8. // This program is distributed in the hope that it will be useful,
  9. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. // GNU General Public License for more details.
  12. //
  13. // You should have received a copy of the GNU General Public License
  14. // along with this program. If not, see <https://www.gnu.org/licenses/>.
  15. #include "manifest.hpp"
  16. #include <iostream>
  17. #include <fstream>
  18. #include <sstream>
  19. #include <boost/filesystem.hpp>
  20. //#include <filesystem> // c++17
  21. //namespace fs = std::filesystem;
  22. namespace fs = boost::filesystem;
  23. using namespace std;
  24. Manifest::Manifest( const string basedir, const string file_prefix ) {
  25. Manifest::basedir = basedir;
  26. Manifest::base_manifest_file_prefix = file_prefix;
  27. map<checksum_algorithms, string> possible_manifest_files;
  28. Manifest::exist_manifest_files = false;
  29. for (checksum_algorithms alg : checksum_algorithmsList) {
  30. possible_manifest_files[alg] = basedir + Manifest::base_manifest_file_prefix + string_of_algorithm( alg ) + ".txt";
  31. }
  32. for (auto & possible_manifest_file : possible_manifest_files) {
  33. // debug
  34. // log << it->first << " => " << it->second << endl;
  35. // test if file exists
  36. //string filename = basedir + it->second;
  37. string filename = possible_manifest_file.second;
  38. checksum_algorithms alg = possible_manifest_file.first;
  39. fs::path p{ filename };
  40. fs::file_status s = fs::status( p );
  41. this->log << "path "<< p.string() << endl;
  42. this->log << "is file: "<< (fs::is_regular_file( s)?"yes":"no") << endl;
  43. if (fs::is_regular_file( s)) {
  44. Manifest::manifest_algorithm_files[alg] = filename;
  45. Manifest::exist_manifest_files = true;
  46. // debug
  47. //log << "TAGMANIFEST: alg="<< it->first << " => filename=" << it->second << endl;
  48. //log << "TAGMANIFEST2: alg="<< it->first << " => filename=" << filename << endl;
  49. }
  50. }
  51. //log << "TAGMANIFEST constructor, calling debug()" << endl;
  52. //this->debug();
  53. //log << "TAGMANIFEST constructor, finished" << endl;
  54. }
  55. multimap<checksum_string_t,filename_t> Manifest::get_checksum_file_pairs(checksum_algorithms alg) {
  56. string filename = this->manifest_algorithm_files[ alg ];
  57. multimap<checksum_string_t,filename_t> checksum_file_pairs;
  58. // log << "using file " << filename << endl;
  59. ifstream file;
  60. file.open( filename );
  61. if (file.is_open()) {
  62. string line;
  63. while (getline(file, line)) {
  64. stringstream line_ss ( line );
  65. string checksum;
  66. string subfile;
  67. line_ss >> checksum;
  68. line_ss >> subfile;
  69. checksum_file_pairs.insert( pair<checksum_string_t, filename_t>(checksum,subfile) );
  70. // log << "checksum="<<checksum<<" file="<<subfile<<endl;
  71. }
  72. file.close();
  73. }
  74. return checksum_file_pairs;
  75. }
  76. bool Manifest::validate() {
  77. Checksum checksum;
  78. bool is_valid = true;
  79. bool at_least_one_manifest = false;
  80. this->log << "validate exist manifestfiles = '"<< (this->exist_manifest_files?"yes":"no") <<endl;
  81. this->log << "validate count = '"<< (this->manifest_algorithm_files.size()) <<endl;
  82. for (auto & manifest_algorithm_file : this->manifest_algorithm_files) {
  83. string manifest_file = manifest_algorithm_file.second;
  84. checksum_algorithms alg = manifest_algorithm_file.first;
  85. fs::path p{ manifest_file };
  86. if (fs::is_regular_file( p )) {
  87. at_least_one_manifest=true;
  88. }
  89. this->log << "validate using file '"<< (manifest_file) <<endl;
  90. multimap<checksum_string_t,filename_t>checksum_file_pairs = this->get_checksum_file_pairs( alg );
  91. for (auto & checksum_file_pair : checksum_file_pairs) {
  92. string expected_checksum = checksum_file_pair.first;
  93. string file = this->basedir + checksum_file_pair.second;
  94. string calc_checksum = checksum.checksum_of_file( file, alg );
  95. if (calc_checksum.empty()) {
  96. this->log << "Bagit file '" << file << "', checksum '" << expected_checksum << "' is expected by file '" << manifest_file << "', but file was not found" << endl;
  97. is_valid = false;
  98. }
  99. else if (0 != expected_checksum.compare( calc_checksum)) {
  100. this->log << "Bagit file '" << file << "', checksum '" << expected_checksum << "' is expected by file '" << manifest_file << "', but found: '" << calc_checksum << "'" << endl;
  101. is_valid = false;
  102. }
  103. fs::path p{ file };
  104. // fs::file_status s = fs::status( p );
  105. if (! fs::is_regular_file( p )) {
  106. this->log << "Bagit file '" << file << "' does not exists in '" << this->basedir << "'" << endl;
  107. }
  108. }
  109. }
  110. if (! at_least_one_manifest) { is_valid = false; } // at least one manifest is needed!
  111. log << "MANIFESTLOG:" << this->log.str() << endl;
  112. return is_valid;
  113. }
  114. list<string> Manifest::get_checksummed_files() {
  115. list<string> files;
  116. for (auto & manifest_algorithm_file : this->manifest_algorithm_files) {
  117. multimap<checksum_string_t,filename_t>checksum_file_pairs = this->get_checksum_file_pairs( manifest_algorithm_file.first );
  118. for (auto & checksum_file_pair : checksum_file_pairs) {
  119. string file = this->basedir + checksum_file_pair.second;
  120. // log << "MF: '" << file << "'" << endl;
  121. files.push_back( file );
  122. }
  123. }
  124. files.sort();
  125. files.unique();
  126. return files;
  127. }
  128. bool Manifest::store( const string& basedir, list<string>& files ) {
  129. map<checksum_algorithms, string> possible_manifest_files;
  130. for (checksum_algorithms alg : checksum_algorithmsList) {
  131. possible_manifest_files[alg] = Manifest::base_manifest_file_prefix + string_of_algorithm( alg ) + ".txt";
  132. }
  133. for (auto file : files) {
  134. log << "DEBUG manifest-files file='" << file << "'" << endl;
  135. }
  136. Checksum checksum;
  137. for (auto & possible_manifest_file : possible_manifest_files) {
  138. checksum_algorithms alg = possible_manifest_file.first;
  139. log << "DEBUG manifest::store alg=" << string_of_algorithm(alg) << endl;
  140. if (!possible_manifest_file.second.empty()) {
  141. // test if file exists
  142. string algfilename = basedir + possible_manifest_file.second;
  143. log << "DEBUG manifest::store, alg=" << string_of_algorithm(alg) << " file='"<<algfilename << "' (newbase='" << basedir << "', filename='" << possible_manifest_file.second << "'" <<endl;
  144. ofstream alg_txt_file;
  145. alg_txt_file.open( algfilename );
  146. if (alg_txt_file.is_open()) {
  147. list<string>::iterator ch;
  148. for (ch=files.begin(); ch!=files.end(); ++ch) {
  149. string filename = basedir + *ch;
  150. log << "DEBUG manifest::store\tfilename='"<<filename<<"'"<<endl;
  151. string checksum_string = checksum.checksum_of_file( filename, alg);
  152. alg_txt_file << checksum_string << " " << *ch << endl;
  153. }
  154. alg_txt_file.close();
  155. }
  156. }
  157. }
  158. return true;
  159. }
  160. void Manifest::get_logstream( stringstream & log ) {
  161. log << this->log.rdbuf();
  162. }
  163. void Manifest::reset_logstream() {
  164. this->log.str(std::string());
  165. }
  166. // vim: set tabstop=4 softtabstop=0 expandtab shiftwidth=4 smarttab