C++ Library to handle BagIt structures. BagIt is a standard format to create transfer packages for digital preservation purposes. See https://en.wikipedia.org/wiki/BagIt for details http://andreas-romeyke.de
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

418 lines
13 KiB

// Copyright (C) 2018 Andreas Romeyke (art1@andreas-romeyke.de), 2018.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
#include "bagmetadata.hpp"
#include <iostream>
#include <fstream>
#include <regex>
#include <sstream>
#include <string>
#include <tr1/regex>
#include <boost/filesystem.hpp>
//#include <filesystem> // c++17
//namespace fs = std::filesystem;
namespace fs = boost::filesystem;
using namespace std;
Bagmetadata::Bagmetadata( string basedir ) {
// test if file exists
string filename = basedir + "bag-info.txt";
fs::path p{ filename };
fs::file_status s = fs::status( p );
log << "path "<< p.string() << endl;
log << "is file: "<< fs::is_regular_file( s) << endl;
if (fs::is_regular_file( s)) {
this->exist_bagmetadata_file = true;
// map entries
// from spec: A metadata element MUST consist of a label, a colon, and a value,
// each separated by optional whitespace
// Long values may be continued
// onto the next line by inserting a newline (LF), a carriage return
// (CR), or carriage return plus newline (CRLF) and indenting the next
// line with linear white space (spaces or tabs)
ifstream file;
file.open( filename );
if (file.is_open()) {
string content(
(istreambuf_iterator<char>(file)),
istreambuf_iterator<char>()
);
file.close();
// parse content using regex based split
regex rgx_line(R"(\n(?=\S))"); // entry should not start with whitespace
regex key_value_separator(R"(:\s*)");
// DEBUG: string repl = "|";
// DEBUG: string content2 = regex_replace(content, rgx, "|");
// DEBUG: log << "REPL:(" << content2 << ")" << endl;
sregex_token_iterator iter_line(content.begin(),
content.end(),
rgx_line,
-1);
sregex_token_iterator endline;
for ( ; iter_line != endline; ++iter_line) {
// DEBUG: std::log << "ENTRY(" << *iter_line << ")" << endl;
string line = *iter_line;
sregex_token_iterator iter(line.begin(),
line.end(),
key_value_separator,
-1);
string key;
string value;
// TODO(art1): add check for results!
key = *(iter);
value = (*++iter);
// clean \n at the end of a multiline value
if (!value.empty())
{
if (value[value.length()-1] == '\n') {
value.erase(value.length()-1);
}
}
Bagmetadata::metadata[ key ] = value;
// DEBUG:
log << "KEY='"<<key<<"' value='"<<value << "'" <<endl;
}
}
} else { // no file
this->exist_bagmetadata_file = false;
}
}
bool Bagmetadata::has_metadata() {
return this->exist_bagmetadata_file;
}
map <string,string> Bagmetadata::get_metadata() {
return this->metadata;
}
bool Bagmetadata::validate() {
bool is_valid = true;
return is_valid;
}
bool Bagmetadata::has_PayloadOxum() {
map<string, string>::iterator it;
it = this->metadata.find("Payload-Oxum");
if (it != this->metadata.end()) {
log << "Oxum is: " << it->second << endl;
return true;
}
return false;
}
oxum_t Bagmetadata::get_PayloadOxum() {
oxum_t oxum{};
oxum.octetcount=0;
oxum.streamcount=0;
map<string, string>::iterator it;
it = this->metadata.find("Payload-Oxum");
if (it != this->metadata.end()) {
// split by .
stringstream oxumstring(it->second);
string soctets;
string sstreams;
getline(oxumstring, soctets, '.');
getline(oxumstring, sstreams, '.');
oxum.octetcount = stoul( soctets );
oxum.streamcount = stoul( sstreams );
}
return oxum;
}
void Bagmetadata::set_PayloadOxum( oxum_t oxum ) {
this->metadata[ "Payload-Oxum" ] = to_string( oxum.octetcount ) + "." + to_string( oxum.streamcount );
this->set_BagSize( oxum.octetcount );
}
bool Bagmetadata::store( string basedir ) {
fs::path p{ basedir };
fs::file_status s = fs::status( p );
if (! fs::is_directory( s)) {
Bagmetadata::log << "directory '" << basedir << "' does not exist" << endl;
return false;
}
string filename = basedir + "bag-info.txt";
ofstream baginfo_txt_file;
baginfo_txt_file.open( filename );
if (baginfo_txt_file.is_open()) {
for (auto & it : this->metadata) {
baginfo_txt_file << (it.first) << " : " << (it.second) << endl;
}
baginfo_txt_file.close();
}
return true;
}
void Bagmetadata::get_logstream( stringstream & log ) {
log << this->log.rdbuf();
}
////////
bool Bagmetadata::has_SourceOrganization() {
map<string, string>::iterator it;
it = this->metadata.find("SourceOrganization");
if(it != this->metadata.end()) { return true; }
return false;
}
bool Bagmetadata::has_OrganizationAddress() {
map<string, string>::iterator it;
it = this->metadata.find("OrganizationAddress");
if(it != this->metadata.end()) { return true; }
return false;
}
bool Bagmetadata::has_ContactName() {
map<string, string>::iterator it;
it = this->metadata.find("ContactName");
if(it != this->metadata.end()) { return true; }
return false;
}
bool Bagmetadata::has_ContactPhone() {
map<string, string>::iterator it;
it = this->metadata.find("ContactPhone");
if(it != this->metadata.end()) { return true; }
return false;
}
bool Bagmetadata::has_ContactEmail() {
map<string, string>::iterator it;
it = this->metadata.find("ContactEmail");
if(it != this->metadata.end()) { return true; }
return false;
}
bool Bagmetadata::has_ExternalDescription() {
map<string, string>::iterator it;
it = this->metadata.find("ExternalDescription");
if(it != this->metadata.end()) { return true; }
return false;
}
bool Bagmetadata::has_BaggingDate() {
map<string, string>::iterator it;
it = this->metadata.find("BaggingDate");
if(it != this->metadata.end()) { return true; }
return false;
}
bool Bagmetadata::has_ExternalIdentifier() {
map<string, string>::iterator it;
it = this->metadata.find("ExternalIdentifier");
if(it != this->metadata.end()) { return true; }
return false;
}
bool Bagmetadata::has_BagSize() {
map<string, string>::iterator it;
it = this->metadata.find("BagSize");
if(it != this->metadata.end()) { return true; }
return false;
}
bool Bagmetadata::has_BagGroupIdentifier() {
map<string, string>::iterator it;
it = this->metadata.find("BagGroupIdentifier");
if(it != this->metadata.end()) { return true; }
return false;
}
bool Bagmetadata::has_BagCount() {
map<string, string>::iterator it;
it = this->metadata.find("BagCount");
if(it != this->metadata.end()) { return true; }
return false;
}
bool Bagmetadata::has_InternalSenderIdentifier() {
map<string, string>::iterator it;
it = this->metadata.find("InternalSenderIdentifier");
if(it != this->metadata.end()) { return true; }
return false;
}
bool Bagmetadata::has_InternalSenderDescription() {
map<string, string>::iterator it;
it = this->metadata.find("InternalSenderDescription");
if(it != this->metadata.end()) { return true; }
return false;
}
string Bagmetadata::get_SourceOrganization() {
map<string, string>::iterator it;
it = this->metadata.find("SourceOrganization");
if(it != this->metadata.end()) { return it->second; }
return "";
}
string Bagmetadata::get_OrganizationAddress() {
map<string, string>::iterator it;
it = this->metadata.find("OrganizationAddress");
if(it != this->metadata.end()) { return it->second; }
return "";
}
string Bagmetadata::get_ContactName() {
map<string, string>::iterator it;
it = this->metadata.find("ContactName");
if(it != this->metadata.end()) { return it->second; }
return "";
}
string Bagmetadata::get_ContactPhone() {
map<string, string>::iterator it;
it = this->metadata.find("ContactPhone");
if(it != this->metadata.end()) { return it->second; }
return "";
}
string Bagmetadata::get_ContactEmail() {
map<string, string>::iterator it;
it = this->metadata.find("ContactEmail");
if(it != this->metadata.end()) { return it->second; }
return "";
}
string Bagmetadata::get_ExternalDescription() {
map<string, string>::iterator it;
it = this->metadata.find("ExternalDescription");
if(it != this->metadata.end()) { return it->second; }
return "";
}
// TODO: should return YY-MM-DD
string Bagmetadata::get_BaggingDate() {
map<string, string>::iterator it;
it = this->metadata.find("BaggingDate");
if(it != this->metadata.end()) { return it->second; }
return "";
}
string Bagmetadata::get_ExternalIdentifier() {
map<string, string>::iterator it;
it = this->metadata.find("ExternalIdentifier");
if(it != this->metadata.end()) { return it->second; }
return "";
}
string Bagmetadata::get_BagSize() {
map<string, string>::iterator it;
it = this->metadata.find("BagSize");
if(it != this->metadata.end()) { return it->second; }
return "";
}
string Bagmetadata::get_BagGroupIdentifier() {
map<string, string>::iterator it;
it = this->metadata.find("BagGroupIdentifier");
if(it != this->metadata.end()) { return it->second; }
return "";
}
// TODO: return N of T or N of ?
string Bagmetadata::get_BagCount() {
map<string, string>::iterator it;
it = this->metadata.find("BagCount");
if(it != this->metadata.end()) { return it->second; }
return "";
}
string Bagmetadata::get_InternalSenderIdentifier() {
map<string, string>::iterator it;
it = this->metadata.find("InternalSenderIdentifier");
if(it != this->metadata.end()) { return it->second; }
return "";
}
string Bagmetadata::get_InternalSenderDescription() {
map<string, string>::iterator it;
it = this->metadata.find("InternalSenderDescription");
if(it != this->metadata.end()) { return it->second; }
log <<"ITD: NOT FOUND"<< endl;
return "NOTFOUND";
}
void Bagmetadata::set_SourceOrganization(string SourceOrganization) {
this->metadata["SourceOrganization"] = SourceOrganization;
}
void Bagmetadata::set_OrganizationAddress(string OrganizationAddress) {
this->metadata["OrganizationAddress"] = OrganizationAddress;
}
void Bagmetadata::set_ContactName(string ContactName) {
this->metadata["ContactName"] = ContactName;
}
void Bagmetadata::set_ContactPhone(string ContactPhone) {
this->metadata["ContactPhone"] = ContactPhone;
}
void Bagmetadata::set_ContactEmail(string ContactEmail) {
this->metadata["ContactEmail"] = ContactEmail;
}
void Bagmetadata::set_ExternalDescription(string ExternalDescription) {
this->metadata["ExternalDescription"] = ExternalDescription;
}
// TODO: set YYYY-MM-DD
void Bagmetadata::set_BaggingDate(string BaggingDate) {
this->metadata["BaggingDate"] = BaggingDate;
}
void Bagmetadata::set_BaggingDate( std::time_t BaggingDate ) {
char datestring[100];
std::strftime( datestring, sizeof(datestring), "%Y-%m-%d", std::localtime(&BaggingDate));
this->metadata["BaggingDate"] = string(datestring);
}
void Bagmetadata::set_ExternalIdentifier(string ExternalIdentifier) {
this->metadata["ExternalIdentifier"] = ExternalIdentifier;
}
void Bagmetadata::set_BagSize(string BagSize) {
this->metadata["BagSize"] = BagSize;
}
void Bagmetadata::set_BagSize( unsigned long long int bytes) {
if (bytes > 1024ull * 1024ull * 1024ull * 1024ull) {
this->metadata["BagSize"] = to_string (bytes/ (1024ull*1024ull*1024ull*1024ull)) + " TB";
} else if (bytes > (1024ull *1024ull*1024ull) ) {
this->metadata["BagSize"] = to_string (bytes/ (1024ull*1024ull*1024ull)) + " GB";
} else if (bytes > (1024ull*1024ull) ) {
this->metadata["BagSize"] = to_string (bytes/ (1024ull*1024)) + " MB";
} else if (bytes > (1024ull) ) {
this->metadata["BagSize"] = to_string (bytes/ (1024ull)) + " kB";
} else {
this->metadata["BagSize"] = to_string (bytes) + " B";
}
}
void Bagmetadata::set_BagGroupIdentifier(string BagGroupIdentifier) {
this->metadata["BagGroupIdentifier"] = BagGroupIdentifier;
}
// TODO: fix BagCount, needs "N of T" or "N of ?" where N, T is replaced by an int
void Bagmetadata::set_BagCount(string BagCount) {
this->metadata["BagCount"] = BagCount;
}
void Bagmetadata::set_InternalSenderIdentifier(string InternalSenderIdentifier) {
this->metadata["InternalSenderIdentifier"] = InternalSenderIdentifier;
}
void Bagmetadata::set_InternalSenderDescription(string InternalSenderDescription) {
this->metadata["InternalSenderDescription"] = InternalSenderDescription;
}
////////
void Bagmetadata::reset_logstream() {
this->log.str(std::string());
}
// vim: set tabstop=4 softtabstop=0 expandtab shiftwidth=4 smarttab