C++ Library to handle BagIt structures. BagIt is a standard format to create transfer packages for digital preservation purposes. See https://en.wikipedia.org/wiki/BagIt for details http://andreas-romeyke.de
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

113 lines
3.3 KiB

#include "bagmetadata.hpp"
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <regex>
#include <tr1/regex>
#include <boost/filesystem.hpp>
//#include <filesystem> // c++17
//namespace fs = std::filesystem;
namespace fs = boost::filesystem;
using namespace std;
Bagmetadata::Bagmetadata( string basedir ) {
// test if file exists
string filename = basedir + "bag-info.txt";
fs::path p{ filename };
fs::file_status s = fs::status( p );
cout << "path "<< p.string() << endl;
cout << "is file: "<< fs::is_regular_file( s) << endl;
if (fs::is_regular_file( s)) {
this->exist_bagmetadata_file = true;
// map entries
// from spec: A metadata element MUST consist of a label, a colon, and a value,
// each separated by optional whitespace
// Long values may be continued
// onto the next line by inserting a newline (LF), a carriage return
// (CR), or carriage return plus newline (CRLF) and indenting the next
// line with linear white space (spaces or tabs)
ifstream file;
file.open( filename );
if (file.is_open()) {
string content(
(istreambuf_iterator<char>(file)),
istreambuf_iterator<char>()
);
file.close();
// parse content using regex based split
regex rgx_line("\\n(?=\\S)"); // entry should not start with whitespace
regex key_value_separator(":\\s*");
// DEBUG: string repl = "|";
// DEBUG: string content2 = regex_replace(content, rgx, "|");
// DEBUG: cout << "REPL:(" << content2 << ")" << endl;
sregex_token_iterator iter_line(content.begin(),
content.end(),
rgx_line,
-1);
sregex_token_iterator endline;
for ( ; iter_line != endline; ++iter_line) {
// DEBUG: std::cout << "ENTRY(" << *iter_line << ")" << endl;
string line = *iter_line;
sregex_token_iterator iter(line.begin(),
line.end(),
key_value_separator,
-1);
string key;
string value;
// TODO: add check for results!
key = *(iter);
value = (*++iter);
Bagmetadata::metadata[ key ] = value;
// DEBUG:
cout << "KEY="<<key<<" value="<<value<<endl;
}
}
} else { // no file
this->exist_bagmetadata_file = false;
}
}
map <string,string> Bagmetadata::get_metadata() {
return this->metadata;
}
bool Bagmetadata::validate(list<string> & log ) {
bool is_valid = true;
return is_valid;
}
bool Bagmetadata::has_oxum() {
map<string, string>::iterator it;
it = this->metadata.find("Payload-Oxum");
if (it != this->metadata.end()) {
cout << "Oxum is: " << it->second << endl;
return true;
}
return false;
}
oxum_t Bagmetadata::get_oxum() {
oxum_t oxum;
oxum.octetcount=0;
oxum.streamcount=0;
map<string, string>::iterator it;
it = this->metadata.find("Payload-Oxum");
if (it != this->metadata.end()) {
// split by .
stringstream oxumstring(it->second);
string soctets;
string sstreams;
getline(oxumstring, soctets, '.');
getline(oxumstring, sstreams, '.');
oxum.octetcount = stoi( soctets );
oxum.streamcount = stoi( sstreams );
}
return oxum;
}
// vim: set tabstop=4 softtabstop=0 expandtab shiftwidth=4 smarttab