|
|
@ -1,5 +1,5 @@ |
|
|
|
package File::FormatIdentification::Regex; |
|
|
|
|
|
|
|
# helper module to combine and optimize regexes |
|
|
|
use 5.024001; |
|
|
|
use strict; |
|
|
|
use warnings; |
|
|
@ -87,7 +87,11 @@ sub hex_replace_from_bracket { |
|
|
|
|
|
|
|
sub peep_hole_optimizer ($) { |
|
|
|
my $regex = $_[0]; # only works if special Regexes within File::FormatIdentification:: used |
|
|
|
$regex = hex_replace_to_bracket($regex); |
|
|
|
|
|
|
|
#$regex = hex_replace_to_bracket($regex); |
|
|
|
if ($regex =~ m/\\x[0-9]+/) { |
|
|
|
confess "regex '$regex' has invalid \\x sequences, use \\x{} instead!"; |
|
|
|
} |
|
|
|
my $oldregex = $regex; |
|
|
|
##### first optimize bracket-groups |
|
|
|
my $subrg = |
|
|
@ -198,15 +202,11 @@ sub peep_hole_optimizer ($) { |
|
|
|
# say "Found in regex='$regex' sub='$sub' with matches=$matches"; |
|
|
|
# $regex =~ s#($subrg)\1{3,}(?!$subrg*\}#$sub\{$matches\}#; |
|
|
|
#} |
|
|
|
#### restore \x{ff} to \xff |
|
|
|
$regex = hex_replace_from_bracket($regex); |
|
|
|
if ( $regex =~ m#\\x0\{# ) { |
|
|
|
confess "wrong substitution of oldregex = \n\t'", $oldregex, |
|
|
|
"'\n -> \n\t'", $regex, "'"; |
|
|
|
} |
|
|
|
|
|
|
|
return $regex; |
|
|
|
} |
|
|
|
|
|
|
|
# calc regex quality, if more specific the quality is higher |
|
|
|
sub calc_quality ($) { |
|
|
|
my $regex = shift; |
|
|
|
|
|
|
|