C++ Library to handle BagIt structures. BagIt is a standard format to create transfer packages for digital preservation purposes. See https://en.wikipedia.org/wiki/BagIt for details http://andreas-romeyke.de
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

83 lines
2.5 KiB

#include "bagmetadata.hpp"
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <regex>
#include <tr1/regex>
#include <boost/filesystem.hpp>
//#include <filesystem> // c++17
//namespace fs = std::filesystem;
namespace fs = boost::filesystem;
using namespace std;
Bagmetadata::Bagmetadata( string basedir ) {
// test if file exists
string filename = basedir + "bag-info.txt";
fs::path p{ filename };
fs::file_status s = fs::status( p );
cout << "path "<< p.string() << endl;
cout << "is file: "<< fs::is_regular_file( s) << endl;
if (fs::is_regular_file( s)) {
this->exist_bagmetadata_file = true;
// map entries
// from spec: A metadata element MUST consist of a label, a colon, and a value,
// each separated by optional whitespace
// Long values may be continued
// onto the next line by inserting a newline (LF), a carriage return
// (CR), or carriage return plus newline (CRLF) and indenting the next
// line with linear white space (spaces or tabs)
ifstream file;
file.open( filename );
if (file.is_open()) {
string content(
(istreambuf_iterator<char>(file)),
istreambuf_iterator<char>()
);
file.close();
// parse content using regex based split
regex rgx_line("\\n(?=\\S)"); // entry should not start with whitespace
regex key_value_separator(":\\s*");
// DEBUG: string repl = "|";
// DEBUG: string content2 = regex_replace(content, rgx, "|");
// DEBUG: cout << "REPL:(" << content2 << ")" << endl;
sregex_token_iterator iter_line(content.begin(),
content.end(),
rgx_line,
-1);
sregex_token_iterator endline;
for ( ; iter_line != endline; ++iter_line) {
// DEBUG: std::cout << "ENTRY(" << *iter_line << ")" << endl;
string line = *iter_line;
sregex_token_iterator iter(line.begin(),
line.end(),
key_value_separator,
-1);
string key;
string value;
// TODO: add check for results!
key = *(iter);
value = (*++iter);
Bagmetadata::metadata[ key ] = value;
// DEBUG: cout << "KEY="<<key<<" value="<<value<<endl;
}
}
} else { // no file
this->exist_bagmetadata_file = false;
}
}
map <string,string> Bagmetadata::get_metadata() {
return this->metadata;
}
bool Bagmetadata::validate() {
// TODO
return true;
}
// vim: set tabstop=4 softtabstop=0 expandtab shiftwidth=4 smarttab