Perl-module which allows to use Droid/PRONOM signatures and to convert it to Perl regular expressions, analyze files using wxHexEditor tags to display matches and calc statistics. For PRONOM see https://www.nationalarchives.gov.uk/PRONOM/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

112 lines
4.9 KiB

  1. #!/usr/bin/perl -w
  2. use strict;
  3. use warnings;
  4. use diagnostics;
  5. use Test::More tests => 45;
  6. use Test::Exception;
  7. ### tests
  8. BEGIN { use_ok("File::FormatIdentification::Regex"); }
  9. is( and_combine( '^', '$' ), '^$', 'and_combine(\'^\', \'$\')' );
  10. # example from https://stackoverflow.com/questions/869809/combine-regexp#870506
  11. is( and_combine( '^abc', 'xyz$' ),
  12. '(?=^abc)(?=.*xyz$)', 'and_combine(\'^abc\', \'xyz$\')' );
  13. # unsure if this will be correct:
  14. # is(and_combine('abc', '.b.'), 'abc', "and_combine('abc', '.b.')");
  15. # using this instead:
  16. is( and_combine( 'abc', '.b.' ), '(?=abc)(?=.b.)',
  17. "and_combine('abc', '.b.')" );
  18. # usure if we should detect this:
  19. # throws_ok( sub{and_combine('abc', 'xyz')}, qr(not combineable), "and_combine('abc', 'xyz') does not work");
  20. # better to use this:
  21. is( and_combine( 'foo', 'bar' ), "(?=foo)(?=bar)",
  22. "and_combine('foo', 'bar')" );
  23. is(
  24. and_combine( 'foo', 'bar', 'baz' ),
  25. "(?=foo)(?=bar)(?=baz)",
  26. "and_combine('foo', 'bar', 'baz')"
  27. );
  28. # because Regex::Assemble changes order, following does not work:
  29. # (or_combine('foo', 'bar'), '(?:foo|bar)', "or_combine('foo', 'bar')");
  30. # using this instead:
  31. is( or_combine( 'foo', 'bar' ), '(?:bar|foo)', "or_combine('foo', 'bar')" );
  32. is( or_combine( 'foo', 'bar', 'baz' ),
  33. '(?:ba[rz]|foo)', "or_combine('foo', 'bar', 'baz')" );
  34. ###
  35. use File::FormatIdentification::Regex
  36. qw( hex_replace_from_bracket hex_replace_to_bracket );
  37. is(
  38. hex_replace_to_bracket('\x00\x00\x00\x00\x00'),
  39. '\x{00}\x{00}\x{00}\x{00}\x{00}',
  40. 'hex_replace_to_bracket(\'\x00\x00\x00\x00\x00\')'
  41. );
  42. is( hex_replace_from_bracket('\x{00}\x{00}\x{00}\x{00}\x{00}'),
  43. '\x00\x00\x00\x00\x00',
  44. 'hex_replace_from_bracket(\'\x{00}\x{00}\x{00}\x{00}\x{00}\')' );
  45. ###
  46. is( peep_hole_optimizer("foo"), "foo", "peep_hole_optimizer('foo')" );
  47. is( peep_hole_optimizer("^foo"), "^foo", "peep_hole_optimizer('^foo')" );
  48. is( peep_hole_optimizer("^(foo)"), "^(foo)", "peep_hole_optimizer('^(foo)')" );
  49. is( peep_hole_optimizer("^((foo))"),
  50. "^(foo)", "peep_hole_optimizer('^((foo))')" );
  51. is( peep_hole_optimizer("^((foo)|(bar))"),
  52. "^((foo)|(bar))", "peep_hole_optimizer('^((foo)|(bar))')" );
  53. is( peep_hole_optimizer("^(((foo)|(bar)))"),
  54. "^((foo)|(bar))", "peep_hole_optimizer('^(((foo)|(bar)))')" );
  55. is( peep_hole_optimizer("^(((foo))|(bar))"),
  56. "^((foo)|(bar))", "peep_hole_optimizer('^(((foo))|(bar))')" );
  57. is( peep_hole_optimizer("^((foo)|((bar)))"),
  58. "^((foo)|(bar))", "peep_hole_optimizer('^((foo)|((bar)))')" );
  59. is( peep_hole_optimizer("(bar|baz)"),
  60. "(ba(r|z))", "peep_hole_optimizer('(bar|baz)')" );
  61. is( peep_hole_optimizer('(\x42|\x43)'),
  62. '(\x42|\x43)', 'peep_hole_optimizer(\'(\x42|\x43)\')' );
  63. is( peep_hole_optimizer('(\x34|\x44)'),
  64. '(\x34|\x44)', 'peep_hole_optimizer(\'(\x34|\x44)\')' );
  65. is( peep_hole_optimizer('(\x344|\x444)'),
  66. '(\x344|\x444)', 'peep_hole_optimizer(\'(\x344|\x444)\')' );
  67. is( peep_hole_optimizer("((bar)|(baz))"),
  68. "(ba(r|z))", "peep_hole_optimizer('((bar)|(baz))')" );
  69. is( peep_hole_optimizer("(barf|bazaar)"),
  70. "(ba(rf|zaar))", "peep_hole_optimizer('(barf|bazaar)')" );
  71. is( peep_hole_optimizer("(raf|saf)"),
  72. "((r|s)af)", "peep_hole_optimizer('(raf|saf)')" );
  73. is( peep_hole_optimizer("(braf|asaf)"),
  74. "((br|as)af)", "peep_hole_optimizer('(braf|asaf)')" );
  75. is( peep_hole_optimizer("(rag|saf)"),
  76. "(rag|saf)", "peep_hole_optimizer('(rag|saf)')" );
  77. is( peep_hole_optimizer("barbara"),
  78. "(bar){2}a", "peep_hole_optimizer('barbara')" );
  79. is( peep_hole_optimizer("toooor"), "to{4}r", "peep_hole_optimizer('toooor')" );
  80. is( peep_hole_optimizer("toooooooooooor"),
  81. "to{12}r", "peep_hole_optimizer('toooooooooor')" );
  82. is( peep_hole_optimizer('\x00\x00\x00\x00\x00'),
  83. '\x00{5}', 'peep_hole_optimizer(\'\x00\x00\x00\x00\x00\')' );
  84. is(
  85. peep_hole_optimizer(
  86. '\A(\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xC2)'),
  87. '\A(\x00{12}\xC2)',
  88. 'peep_hole_optimizer(\'\A(\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xC2)\')'
  89. );
  90. is( peep_hole_optimizer('\x0000000007006\x20'),
  91. '\x000{7}7006\x20', 'peep_hole_optimizer(\'\x0000000007006\x20\')' );
  92. is( peep_hole_optimizer("rhabarbarbarabarbara"),
  93. "rha(bar){3}a(bar){2}a", "peep_hole_optimizer('rhabarbarbarabarbara')" );
  94. is( peep_hole_optimizer("a{100000}"),
  95. "a{100000}", "peep_hole_optimizer('a{100000}')" );
  96. ###
  97. is( calc_quality('^'), 0, "calc_quality('^')" );
  98. is( calc_quality('foo'), 1.098, "calc_quality('foo')" );
  99. is( calc_quality('fo{2}'), 1.098, "calc_quality('fo{2}'" );
  100. is( calc_quality('fo{2,}'), 1.098, "calc_quality('fo{2,}'" );
  101. is( calc_quality('^foo'), 1.098, "calc_quality('^foo')" );
  102. is( calc_quality('[fo]o'), -0.405, "calc_quality('[fo]o')" );
  103. is( calc_quality('[^fo]o'), -4.848, "calc_quality('[^fo]o')" );
  104. is( calc_quality('.o'), -4.855, "calc_quality('.o')" );
  105. is( calc_quality('foobarbaz'), 2.197, "calc_quality('foobarbaz')" );
  106. is( calc_quality('.........'), -5.545, "calc_quality('.........')" );