C++ Library to handle BagIt structures. BagIt is a standard format to create transfer packages for digital preservation purposes. See https://en.wikipedia.org/wiki/BagIt for details http://andreas-romeyke.de
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

391 lines
12 KiB

  1. // Copyright (C) 2018 Andreas Romeyke (art1@andreas-romeyke.de), 2018.
  2. //
  3. // This program is free software: you can redistribute it and/or modify
  4. // it under the terms of the GNU General Public License as published by
  5. // the Free Software Foundation, either version 3 of the License, or
  6. // (at your option) any later version.
  7. //
  8. // This program is distributed in the hope that it will be useful,
  9. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. // GNU General Public License for more details.
  12. //
  13. // You should have received a copy of the GNU General Public License
  14. // along with this program. If not, see <https://www.gnu.org/licenses/>.
  15. #include "bagmetadata.hpp"
  16. #include <iostream>
  17. #include <fstream>
  18. #include <regex>
  19. #include <sstream>
  20. #include <string>
  21. #include <tr1/regex>
  22. #include <boost/filesystem.hpp>
  23. //#include <filesystem> // c++17
  24. //namespace fs = std::filesystem;
  25. namespace fs = boost::filesystem;
  26. using namespace std;
  27. Bagmetadata::Bagmetadata( string basedir ) {
  28. // test if file exists
  29. string filename = basedir + "bag-info.txt";
  30. fs::path p{ filename };
  31. fs::file_status s = fs::status( p );
  32. log << "path "<< p.string() << endl;
  33. log << "is file: "<< fs::is_regular_file( s) << endl;
  34. if (fs::is_regular_file( s)) {
  35. this->exist_bagmetadata_file = true;
  36. // map entries
  37. // from spec: A metadata element MUST consist of a label, a colon, and a value,
  38. // each separated by optional whitespace
  39. // Long values may be continued
  40. // onto the next line by inserting a newline (LF), a carriage return
  41. // (CR), or carriage return plus newline (CRLF) and indenting the next
  42. // line with linear white space (spaces or tabs)
  43. ifstream file;
  44. file.open( filename );
  45. if (file.is_open()) {
  46. string content(
  47. (istreambuf_iterator<char>(file)),
  48. istreambuf_iterator<char>()
  49. );
  50. file.close();
  51. // parse content using regex based split
  52. regex rgx_line(R"(\n(?=\S))"); // entry should not start with whitespace
  53. regex key_value_separator(R"(:\s*)");
  54. // DEBUG: string repl = "|";
  55. // DEBUG: string content2 = regex_replace(content, rgx, "|");
  56. // DEBUG: log << "REPL:(" << content2 << ")" << endl;
  57. sregex_token_iterator iter_line(content.begin(),
  58. content.end(),
  59. rgx_line,
  60. -1);
  61. sregex_token_iterator endline;
  62. for ( ; iter_line != endline; ++iter_line) {
  63. // DEBUG: std::log << "ENTRY(" << *iter_line << ")" << endl;
  64. string line = *iter_line;
  65. sregex_token_iterator iter(line.begin(),
  66. line.end(),
  67. key_value_separator,
  68. -1);
  69. string key;
  70. string value;
  71. // TODO(art1): add check for results!
  72. key = *(iter);
  73. value = (*++iter);
  74. // clean \n at the end of a multiline value
  75. if (!value.empty())
  76. {
  77. if (value[value.length()-1] == '\n') {
  78. value.erase(value.length()-1);
  79. }
  80. }
  81. Bagmetadata::metadata[ key ] = value;
  82. // DEBUG:
  83. log << "KEY='"<<key<<"' value='"<<value << "'" <<endl;
  84. }
  85. }
  86. } else { // no file
  87. this->exist_bagmetadata_file = false;
  88. }
  89. }
  90. bool Bagmetadata::has_metadata() {
  91. return this->exist_bagmetadata_file;
  92. }
  93. map <string,string> Bagmetadata::get_metadata() {
  94. return this->metadata;
  95. }
  96. bool Bagmetadata::validate() {
  97. bool is_valid = true;
  98. return is_valid;
  99. }
  100. bool Bagmetadata::has_PayloadOxum() {
  101. map<string, string>::iterator it;
  102. it = this->metadata.find("Payload-Oxum");
  103. if (it != this->metadata.end()) {
  104. log << "Oxum is: " << it->second << endl;
  105. return true;
  106. }
  107. return false;
  108. }
  109. oxum_t Bagmetadata::get_PayloadOxum() {
  110. oxum_t oxum{};
  111. oxum.octetcount=0;
  112. oxum.streamcount=0;
  113. map<string, string>::iterator it;
  114. it = this->metadata.find("Payload-Oxum");
  115. if (it != this->metadata.end()) {
  116. // split by .
  117. stringstream oxumstring(it->second);
  118. string soctets;
  119. string sstreams;
  120. getline(oxumstring, soctets, '.');
  121. getline(oxumstring, sstreams, '.');
  122. oxum.octetcount = stoi( soctets );
  123. oxum.streamcount = stoi( sstreams );
  124. }
  125. return oxum;
  126. }
  127. void Bagmetadata::set_PayloadOxum( oxum_t oxum ) {
  128. this->metadata[ "Payload-Oxum" ] = to_string( oxum.octetcount ) + "." + to_string( oxum.streamcount );
  129. }
  130. bool Bagmetadata::store( string basedir ) {
  131. fs::path p{ basedir };
  132. fs::file_status s = fs::status( p );
  133. if (! fs::is_directory( s)) {
  134. Bagmetadata::log << "directory '" << basedir << "' does not exist" << endl;
  135. return false;
  136. }
  137. string filename = basedir + "bag-info.txt";
  138. ofstream baginfo_txt_file;
  139. baginfo_txt_file.open( filename );
  140. if (baginfo_txt_file.is_open()) {
  141. for (auto & it : this->metadata) {
  142. baginfo_txt_file << (it.first) << " : " << (it.second) << endl;
  143. }
  144. baginfo_txt_file.close();
  145. }
  146. return true;
  147. }
  148. void Bagmetadata::get_logstream( stringstream & log ) {
  149. log << this->log.rdbuf();
  150. }
  151. ////////
  152. bool Bagmetadata::has_SourceOrganization() {
  153. map<string, string>::iterator it;
  154. it = this->metadata.find("SourceOrganization");
  155. if(it != this->metadata.end()) { return true; }
  156. return false;
  157. }
  158. bool Bagmetadata::has_OrganizationAddress() {
  159. map<string, string>::iterator it;
  160. it = this->metadata.find("OrganizationAddress");
  161. if(it != this->metadata.end()) { return true; }
  162. return false;
  163. }
  164. bool Bagmetadata::has_ContactName() {
  165. map<string, string>::iterator it;
  166. it = this->metadata.find("ContactName");
  167. if(it != this->metadata.end()) { return true; }
  168. return false;
  169. }
  170. bool Bagmetadata::has_ContactPhone() {
  171. map<string, string>::iterator it;
  172. it = this->metadata.find("ContactPhone");
  173. if(it != this->metadata.end()) { return true; }
  174. return false;
  175. }
  176. bool Bagmetadata::has_ContactEmail() {
  177. map<string, string>::iterator it;
  178. it = this->metadata.find("ContactEmail");
  179. if(it != this->metadata.end()) { return true; }
  180. return false;
  181. }
  182. bool Bagmetadata::has_ExternalDescription() {
  183. map<string, string>::iterator it;
  184. it = this->metadata.find("ExternalDescription");
  185. if(it != this->metadata.end()) { return true; }
  186. return false;
  187. }
  188. bool Bagmetadata::has_BaggingDate() {
  189. map<string, string>::iterator it;
  190. it = this->metadata.find("BaggingDate");
  191. if(it != this->metadata.end()) { return true; }
  192. return false;
  193. }
  194. bool Bagmetadata::has_ExternalIdentifier() {
  195. map<string, string>::iterator it;
  196. it = this->metadata.find("ExternalIdentifier");
  197. if(it != this->metadata.end()) { return true; }
  198. return false;
  199. }
  200. bool Bagmetadata::has_BagSize() {
  201. map<string, string>::iterator it;
  202. it = this->metadata.find("BagSize");
  203. if(it != this->metadata.end()) { return true; }
  204. return false;
  205. }
  206. bool Bagmetadata::has_BagGroupIdentifier() {
  207. map<string, string>::iterator it;
  208. it = this->metadata.find("BagGroupIdentifier");
  209. if(it != this->metadata.end()) { return true; }
  210. return false;
  211. }
  212. bool Bagmetadata::has_BagCount() {
  213. map<string, string>::iterator it;
  214. it = this->metadata.find("BagCount");
  215. if(it != this->metadata.end()) { return true; }
  216. return false;
  217. }
  218. bool Bagmetadata::has_InternalSenderIdentifier() {
  219. map<string, string>::iterator it;
  220. it = this->metadata.find("InternalSenderIdentifier");
  221. if(it != this->metadata.end()) { return true; }
  222. return false;
  223. }
  224. bool Bagmetadata::has_InternalSenderDescription() {
  225. map<string, string>::iterator it;
  226. it = this->metadata.find("InternalSenderDescription");
  227. if(it != this->metadata.end()) { return true; }
  228. return false;
  229. }
  230. string Bagmetadata::get_SourceOrganization() {
  231. map<string, string>::iterator it;
  232. it = this->metadata.find("SourceOrganization");
  233. if(it != this->metadata.end()) { return it->second; }
  234. return "";
  235. }
  236. string Bagmetadata::get_OrganizationAddress() {
  237. map<string, string>::iterator it;
  238. it = this->metadata.find("OrganizationAddress");
  239. if(it != this->metadata.end()) { return it->second; }
  240. return "";
  241. }
  242. string Bagmetadata::get_ContactName() {
  243. map<string, string>::iterator it;
  244. it = this->metadata.find("ContactName");
  245. if(it != this->metadata.end()) { return it->second; }
  246. return "";
  247. }
  248. string Bagmetadata::get_ContactPhone() {
  249. map<string, string>::iterator it;
  250. it = this->metadata.find("ContactPhone");
  251. if(it != this->metadata.end()) { return it->second; }
  252. return "";
  253. }
  254. string Bagmetadata::get_ContactEmail() {
  255. map<string, string>::iterator it;
  256. it = this->metadata.find("ContactEmail");
  257. if(it != this->metadata.end()) { return it->second; }
  258. return "";
  259. }
  260. string Bagmetadata::get_ExternalDescription() {
  261. map<string, string>::iterator it;
  262. it = this->metadata.find("ExternalDescription");
  263. if(it != this->metadata.end()) { return it->second; }
  264. return "";
  265. }
  266. string Bagmetadata::get_BaggingDate() {
  267. map<string, string>::iterator it;
  268. it = this->metadata.find("BaggingDate");
  269. if(it != this->metadata.end()) { return it->second; }
  270. return "";
  271. }
  272. string Bagmetadata::get_ExternalIdentifier() {
  273. map<string, string>::iterator it;
  274. it = this->metadata.find("ExternalIdentifier");
  275. if(it != this->metadata.end()) { return it->second; }
  276. return "";
  277. }
  278. string Bagmetadata::get_BagSize() {
  279. map<string, string>::iterator it;
  280. it = this->metadata.find("BagSize");
  281. if(it != this->metadata.end()) { return it->second; }
  282. return "";
  283. }
  284. string Bagmetadata::get_BagGroupIdentifier() {
  285. map<string, string>::iterator it;
  286. it = this->metadata.find("BagGroupIdentifier");
  287. if(it != this->metadata.end()) { return it->second; }
  288. return "";
  289. }
  290. string Bagmetadata::get_BagCount() {
  291. map<string, string>::iterator it;
  292. it = this->metadata.find("BagCount");
  293. if(it != this->metadata.end()) { return it->second; }
  294. return "";
  295. }
  296. string Bagmetadata::get_InternalSenderIdentifier() {
  297. map<string, string>::iterator it;
  298. it = this->metadata.find("InternalSenderIdentifier");
  299. if(it != this->metadata.end()) { return it->second; }
  300. return "";
  301. }
  302. string Bagmetadata::get_InternalSenderDescription() {
  303. map<string, string>::iterator it;
  304. it = this->metadata.find("InternalSenderDescription");
  305. if(it != this->metadata.end()) { return it->second; }
  306. log <<"ITD: NOT FOUND"<< endl;
  307. return "NOTFOUND";
  308. }
  309. void Bagmetadata::set_SourceOrganization(string SourceOrganization) {
  310. this->metadata["SourceOrganization"] = SourceOrganization;
  311. }
  312. void Bagmetadata::set_OrganizationAddress(string OrganizationAddress) {
  313. this->metadata["OrganizationAddress"] = OrganizationAddress;
  314. }
  315. void Bagmetadata::set_ContactName(string ContactName) {
  316. this->metadata["ContactName"] = ContactName;
  317. }
  318. void Bagmetadata::set_ContactPhone(string ContactPhone) {
  319. this->metadata["ContactPhone"] = ContactPhone;
  320. }
  321. void Bagmetadata::set_ContactEmail(string ContactEmail) {
  322. this->metadata["ContactEmail"] = ContactEmail;
  323. }
  324. void Bagmetadata::set_ExternalDescription(string ExternalDescription) {
  325. this->metadata["ExternalDescription"] = ExternalDescription;
  326. }
  327. void Bagmetadata::set_BaggingDate(string BaggingDate) {
  328. this->metadata["BaggingDate"] = BaggingDate;
  329. }
  330. void Bagmetadata::set_ExternalIdentifier(string ExternalIdentifier) {
  331. this->metadata["ExternalIdentifier"] = ExternalIdentifier;
  332. }
  333. void Bagmetadata::set_BagSize(string BagSize) {
  334. this->metadata["BagSize"] = BagSize;
  335. }
  336. void Bagmetadata::set_BagGroupIdentifier(string BagGroupIdentifier) {
  337. this->metadata["BagGroupIdentifier"] = BagGroupIdentifier;
  338. }
  339. void Bagmetadata::set_BagCount(string BagCount) {
  340. this->metadata["BagCount"] = BagCount;
  341. }
  342. void Bagmetadata::set_InternalSenderIdentifier(string InternalSenderIdentifier) {
  343. this->metadata["InternalSenderIdentifier"] = InternalSenderIdentifier;
  344. }
  345. void Bagmetadata::set_InternalSenderDescription(string InternalSenderDescription) {
  346. this->metadata["InternalSenderDescription"] = InternalSenderDescription;
  347. }
  348. ////////
  349. void Bagmetadata::reset_logstream() {
  350. this->log.str(std::string());
  351. }
  352. // vim: set tabstop=4 softtabstop=0 expandtab shiftwidth=4 smarttab