Browse Source

- print_csv_statistics() expects a file now

- fixed columns formatting in print_csv_statistics(), uses "" now
- added command line options to stwitch between standard report and CSV creation
- added progress bar
master
parent
commit
4016dd06c8
2 changed files with 50 additions and 10 deletions
  1. +41
    -7
      bin/pronom_statistics.pl
  2. +9
    -3
      lib/File/FormatIdentification/Pronom.pm

+ 41
- 7
bin/pronom_statistics.pl View File

@@ -19,17 +19,51 @@ use strict;
use warnings 'FATAL';
use utf8;
use v5.24;
use Carp;
use Getopt::Long;
use Term::ProgressBar;
use File::FormatIdentification::Pronom;

################################################################################
# main
################################################################################

my $pronomfile = shift @ARGV;
if ( !defined $pronomfile ) {
say "you need at least a pronom signature file";
my $csv_file;
my $verbose;
my $progress_flag = 1;
GetOptions (
"csvfile=s" => \$csv_file,
"verbose" => \$verbose,
"progress!" => \$progress_flag,
"help" => sub {
say "$0 [--csvfile=FILE] [--verbose] [--noprogress] droid_signature_filename1 [.. droid_signature_filenameN]";
say "$0 --help ";
say "";
say "--csvfile=FILE .............. creates a CSV file to store statistics";
say "--verbose ................... enables more verbose output in standard report";
say "--noprogress ................ disables progress bar";
say "droid_signature_filename..... DROID signature files (container filess not supported yet)";
}
) or croak "wrong option, try '$0 --help'";
if ((defined $csv_file) && (-e $csv_file)) {
croak "CSV file '$csv_file' already exist";
}
say "using $#ARGV signature files";
my $progress = Term::ProgressBar->new( $#ARGV );
for (my $idx=0; $idx < $#ARGV; $idx++) {
my $pronomfile = $ARGV[$idx];
if ( !defined $pronomfile ) {
say "you need at least a pronom signature file";
}
my $pronom = File::FormatIdentification::Pronom->new(
"droid_signature_filename" => $pronomfile
);
if (defined $csv_file) {
$pronom->print_csv_statistics( $csv_file );
} else {
$pronom->print_statistics( $verbose );
}
if ($progress_flag) {
$progress->update($idx);
}
}
my $pronom = File::FormatIdentification::Pronom->new(
"droid_signature_filename" => $pronomfile );
$pronom->print_csv_statistics();
1;

+ 9
- 3
lib/File/FormatIdentification/Pronom.pm View File

@@ -826,6 +826,7 @@ sub _prepare_statistics {

sub print_csv_statistics {
my $self = shift;
my $csv_file = shift;
my $results = $self->_prepare_statistics();
my $version = $results->{filename};
$version =~ s/DROID_SignatureFile_V(\d+)\.xml/$1/;
@@ -847,15 +848,20 @@ sub print_csv_statistics {

my @headers =
qw(version filename count_of_puids count_of_internal_ids count_of_regular_expressions count_of_fileendings count_of_puid_with_fileendings_only count_of_orphaned_internal_ids count_of_multiple_used_regex best_quality_puid best_quality_internal_id best_quality_quality best_quality_combined_regex worst_quality_puid worst_quality_internal_id worst_quality_quality worst_quality_combined_regex);
say "#", join( ",", @headers );
say join(
my $file_exists = (-e $csv_file);
open (my $FH, ">>", "$csv_file") or croak "Can't open file '$csv_file', $0";
if (not $file_exists) {
say $FH "#", join( ",", @headers );
}
say $FH join(
",",
map {
my $result = $results->{$_};
if ( !defined $result ) { $result = ""; }
$result;
"\"$result\"";
} @headers
);
close ($FH);
}

sub print_statistics {

Loading…
Cancel
Save