C++ Library to handle BagIt structures. BagIt is a standard format to create transfer packages for digital preservation purposes. See https://en.wikipedia.org/wiki/BagIt for details http://andreas-romeyke.de
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

377 lines
11 KiB

  1. #include "bagmetadata.hpp"
  2. #include <iostream>
  3. #include <fstream>
  4. #include <regex>
  5. #include <sstream>
  6. #include <string>
  7. #include <tr1/regex>
  8. #include <boost/filesystem.hpp>
  9. //#include <filesystem> // c++17
  10. //namespace fs = std::filesystem;
  11. namespace fs = boost::filesystem;
  12. using namespace std;
  13. Bagmetadata::Bagmetadata( string basedir ) {
  14. // test if file exists
  15. string filename = basedir + "bag-info.txt";
  16. fs::path p{ filename };
  17. fs::file_status s = fs::status( p );
  18. log << "path "<< p.string() << endl;
  19. log << "is file: "<< fs::is_regular_file( s) << endl;
  20. if (fs::is_regular_file( s)) {
  21. this->exist_bagmetadata_file = true;
  22. // map entries
  23. // from spec: A metadata element MUST consist of a label, a colon, and a value,
  24. // each separated by optional whitespace
  25. // Long values may be continued
  26. // onto the next line by inserting a newline (LF), a carriage return
  27. // (CR), or carriage return plus newline (CRLF) and indenting the next
  28. // line with linear white space (spaces or tabs)
  29. ifstream file;
  30. file.open( filename );
  31. if (file.is_open()) {
  32. string content(
  33. (istreambuf_iterator<char>(file)),
  34. istreambuf_iterator<char>()
  35. );
  36. file.close();
  37. // parse content using regex based split
  38. regex rgx_line(R"(\n(?=\S))"); // entry should not start with whitespace
  39. regex key_value_separator(R"(:\s*)");
  40. // DEBUG: string repl = "|";
  41. // DEBUG: string content2 = regex_replace(content, rgx, "|");
  42. // DEBUG: log << "REPL:(" << content2 << ")" << endl;
  43. sregex_token_iterator iter_line(content.begin(),
  44. content.end(),
  45. rgx_line,
  46. -1);
  47. sregex_token_iterator endline;
  48. for ( ; iter_line != endline; ++iter_line) {
  49. // DEBUG: std::log << "ENTRY(" << *iter_line << ")" << endl;
  50. string line = *iter_line;
  51. sregex_token_iterator iter(line.begin(),
  52. line.end(),
  53. key_value_separator,
  54. -1);
  55. string key;
  56. string value;
  57. // TODO(art1): add check for results!
  58. key = *(iter);
  59. value = (*++iter);
  60. // clean \n at the end of a multiline value
  61. if (!value.empty())
  62. {
  63. if (value[value.length()-1] == '\n') {
  64. value.erase(value.length()-1);
  65. }
  66. }
  67. Bagmetadata::metadata[ key ] = value;
  68. // DEBUG:
  69. log << "KEY='"<<key<<"' value='"<<value << "'" <<endl;
  70. }
  71. }
  72. } else { // no file
  73. this->exist_bagmetadata_file = false;
  74. }
  75. }
  76. bool Bagmetadata::has_metadata() {
  77. return this->exist_bagmetadata_file;
  78. }
  79. map <string,string> Bagmetadata::get_metadata() {
  80. return this->metadata;
  81. }
  82. bool Bagmetadata::validate() {
  83. bool is_valid = true;
  84. return is_valid;
  85. }
  86. bool Bagmetadata::has_PayloadOxum() {
  87. map<string, string>::iterator it;
  88. it = this->metadata.find("Payload-Oxum");
  89. if (it != this->metadata.end()) {
  90. log << "Oxum is: " << it->second << endl;
  91. return true;
  92. }
  93. return false;
  94. }
  95. oxum_t Bagmetadata::get_PayloadOxum() {
  96. oxum_t oxum{};
  97. oxum.octetcount=0;
  98. oxum.streamcount=0;
  99. map<string, string>::iterator it;
  100. it = this->metadata.find("Payload-Oxum");
  101. if (it != this->metadata.end()) {
  102. // split by .
  103. stringstream oxumstring(it->second);
  104. string soctets;
  105. string sstreams;
  106. getline(oxumstring, soctets, '.');
  107. getline(oxumstring, sstreams, '.');
  108. oxum.octetcount = stoi( soctets );
  109. oxum.streamcount = stoi( sstreams );
  110. }
  111. return oxum;
  112. }
  113. void Bagmetadata::set_PayloadOxum( oxum_t oxum ) {
  114. this->metadata[ "Payload-Oxum" ] = to_string( oxum.octetcount ) + "." + to_string( oxum.streamcount );
  115. }
  116. bool Bagmetadata::store( string basedir ) {
  117. fs::path p{ basedir };
  118. fs::file_status s = fs::status( p );
  119. if (! fs::is_directory( s)) {
  120. Bagmetadata::log << "directory '" << basedir << "' does not exist" << endl;
  121. return false;
  122. }
  123. string filename = basedir + "bag-info.txt";
  124. ofstream baginfo_txt_file;
  125. baginfo_txt_file.open( filename );
  126. if (baginfo_txt_file.is_open()) {
  127. for (auto & it : this->metadata) {
  128. baginfo_txt_file << (it.first) << " : " << (it.second) << endl;
  129. }
  130. baginfo_txt_file.close();
  131. }
  132. return true;
  133. }
  134. void Bagmetadata::get_logstream( stringstream & log ) {
  135. log << this->log.rdbuf();
  136. }
  137. ////////
  138. bool Bagmetadata::has_SourceOrganization() {
  139. map<string, string>::iterator it;
  140. it = this->metadata.find("SourceOrganization");
  141. if(it != this->metadata.end()) { return true; }
  142. return false;
  143. }
  144. bool Bagmetadata::has_OrganizationAddress() {
  145. map<string, string>::iterator it;
  146. it = this->metadata.find("OrganizationAddress");
  147. if(it != this->metadata.end()) { return true; }
  148. return false;
  149. }
  150. bool Bagmetadata::has_ContactName() {
  151. map<string, string>::iterator it;
  152. it = this->metadata.find("ContactName");
  153. if(it != this->metadata.end()) { return true; }
  154. return false;
  155. }
  156. bool Bagmetadata::has_ContactPhone() {
  157. map<string, string>::iterator it;
  158. it = this->metadata.find("ContactPhone");
  159. if(it != this->metadata.end()) { return true; }
  160. return false;
  161. }
  162. bool Bagmetadata::has_ContactEmail() {
  163. map<string, string>::iterator it;
  164. it = this->metadata.find("ContactEmail");
  165. if(it != this->metadata.end()) { return true; }
  166. return false;
  167. }
  168. bool Bagmetadata::has_ExternalDescription() {
  169. map<string, string>::iterator it;
  170. it = this->metadata.find("ExternalDescription");
  171. if(it != this->metadata.end()) { return true; }
  172. return false;
  173. }
  174. bool Bagmetadata::has_BaggingDate() {
  175. map<string, string>::iterator it;
  176. it = this->metadata.find("BaggingDate");
  177. if(it != this->metadata.end()) { return true; }
  178. return false;
  179. }
  180. bool Bagmetadata::has_ExternalIdentifier() {
  181. map<string, string>::iterator it;
  182. it = this->metadata.find("ExternalIdentifier");
  183. if(it != this->metadata.end()) { return true; }
  184. return false;
  185. }
  186. bool Bagmetadata::has_BagSize() {
  187. map<string, string>::iterator it;
  188. it = this->metadata.find("BagSize");
  189. if(it != this->metadata.end()) { return true; }
  190. return false;
  191. }
  192. bool Bagmetadata::has_BagGroupIdentifier() {
  193. map<string, string>::iterator it;
  194. it = this->metadata.find("BagGroupIdentifier");
  195. if(it != this->metadata.end()) { return true; }
  196. return false;
  197. }
  198. bool Bagmetadata::has_BagCount() {
  199. map<string, string>::iterator it;
  200. it = this->metadata.find("BagCount");
  201. if(it != this->metadata.end()) { return true; }
  202. return false;
  203. }
  204. bool Bagmetadata::has_InternalSenderIdentifier() {
  205. map<string, string>::iterator it;
  206. it = this->metadata.find("InternalSenderIdentifier");
  207. if(it != this->metadata.end()) { return true; }
  208. return false;
  209. }
  210. bool Bagmetadata::has_InternalSenderDescription() {
  211. map<string, string>::iterator it;
  212. it = this->metadata.find("InternalSenderDescription");
  213. if(it != this->metadata.end()) { return true; }
  214. return false;
  215. }
  216. string Bagmetadata::get_SourceOrganization() {
  217. map<string, string>::iterator it;
  218. it = this->metadata.find("SourceOrganization");
  219. if(it != this->metadata.end()) { return it->second; }
  220. return "";
  221. }
  222. string Bagmetadata::get_OrganizationAddress() {
  223. map<string, string>::iterator it;
  224. it = this->metadata.find("OrganizationAddress");
  225. if(it != this->metadata.end()) { return it->second; }
  226. return "";
  227. }
  228. string Bagmetadata::get_ContactName() {
  229. map<string, string>::iterator it;
  230. it = this->metadata.find("ContactName");
  231. if(it != this->metadata.end()) { return it->second; }
  232. return "";
  233. }
  234. string Bagmetadata::get_ContactPhone() {
  235. map<string, string>::iterator it;
  236. it = this->metadata.find("ContactPhone");
  237. if(it != this->metadata.end()) { return it->second; }
  238. return "";
  239. }
  240. string Bagmetadata::get_ContactEmail() {
  241. map<string, string>::iterator it;
  242. it = this->metadata.find("ContactEmail");
  243. if(it != this->metadata.end()) { return it->second; }
  244. return "";
  245. }
  246. string Bagmetadata::get_ExternalDescription() {
  247. map<string, string>::iterator it;
  248. it = this->metadata.find("ExternalDescription");
  249. if(it != this->metadata.end()) { return it->second; }
  250. return "";
  251. }
  252. string Bagmetadata::get_BaggingDate() {
  253. map<string, string>::iterator it;
  254. it = this->metadata.find("BaggingDate");
  255. if(it != this->metadata.end()) { return it->second; }
  256. return "";
  257. }
  258. string Bagmetadata::get_ExternalIdentifier() {
  259. map<string, string>::iterator it;
  260. it = this->metadata.find("ExternalIdentifier");
  261. if(it != this->metadata.end()) { return it->second; }
  262. return "";
  263. }
  264. string Bagmetadata::get_BagSize() {
  265. map<string, string>::iterator it;
  266. it = this->metadata.find("BagSize");
  267. if(it != this->metadata.end()) { return it->second; }
  268. return "";
  269. }
  270. string Bagmetadata::get_BagGroupIdentifier() {
  271. map<string, string>::iterator it;
  272. it = this->metadata.find("BagGroupIdentifier");
  273. if(it != this->metadata.end()) { return it->second; }
  274. return "";
  275. }
  276. string Bagmetadata::get_BagCount() {
  277. map<string, string>::iterator it;
  278. it = this->metadata.find("BagCount");
  279. if(it != this->metadata.end()) { return it->second; }
  280. return "";
  281. }
  282. string Bagmetadata::get_InternalSenderIdentifier() {
  283. map<string, string>::iterator it;
  284. it = this->metadata.find("InternalSenderIdentifier");
  285. if(it != this->metadata.end()) { return it->second; }
  286. return "";
  287. }
  288. string Bagmetadata::get_InternalSenderDescription() {
  289. map<string, string>::iterator it;
  290. it = this->metadata.find("InternalSenderDescription");
  291. if(it != this->metadata.end()) { return it->second; }
  292. log <<"ITD: NOT FOUND"<< endl;
  293. return "NOTFOUND";
  294. }
  295. void Bagmetadata::set_SourceOrganization(string SourceOrganization) {
  296. this->metadata["SourceOrganization"] = SourceOrganization;
  297. }
  298. void Bagmetadata::set_OrganizationAddress(string OrganizationAddress) {
  299. this->metadata["OrganizationAddress"] = OrganizationAddress;
  300. }
  301. void Bagmetadata::set_ContactName(string ContactName) {
  302. this->metadata["ContactName"] = ContactName;
  303. }
  304. void Bagmetadata::set_ContactPhone(string ContactPhone) {
  305. this->metadata["ContactPhone"] = ContactPhone;
  306. }
  307. void Bagmetadata::set_ContactEmail(string ContactEmail) {
  308. this->metadata["ContactEmail"] = ContactEmail;
  309. }
  310. void Bagmetadata::set_ExternalDescription(string ExternalDescription) {
  311. this->metadata["ExternalDescription"] = ExternalDescription;
  312. }
  313. void Bagmetadata::set_BaggingDate(string BaggingDate) {
  314. this->metadata["BaggingDate"] = BaggingDate;
  315. }
  316. void Bagmetadata::set_ExternalIdentifier(string ExternalIdentifier) {
  317. this->metadata["ExternalIdentifier"] = ExternalIdentifier;
  318. }
  319. void Bagmetadata::set_BagSize(string BagSize) {
  320. this->metadata["BagSize"] = BagSize;
  321. }
  322. void Bagmetadata::set_BagGroupIdentifier(string BagGroupIdentifier) {
  323. this->metadata["BagGroupIdentifier"] = BagGroupIdentifier;
  324. }
  325. void Bagmetadata::set_BagCount(string BagCount) {
  326. this->metadata["BagCount"] = BagCount;
  327. }
  328. void Bagmetadata::set_InternalSenderIdentifier(string InternalSenderIdentifier) {
  329. this->metadata["InternalSenderIdentifier"] = InternalSenderIdentifier;
  330. }
  331. void Bagmetadata::set_InternalSenderDescription(string InternalSenderDescription) {
  332. this->metadata["InternalSenderDescription"] = InternalSenderDescription;
  333. }
  334. ////////
  335. void Bagmetadata::reset_logstream() {
  336. this->log.str(std::string());
  337. }
  338. // vim: set tabstop=4 softtabstop=0 expandtab shiftwidth=4 smarttab