The Perl Toolchain Summit 2025 Needs You: You can help 🙏 Learn more

#! /usr/bin/perl -w
#
# guess_encoding.pl -- guess charset encoding
#
# (C) 2007, jw@suse.de, Novell Inc.
# Distribute under GPLv2
#
# 2006-12-05, jw, V0.1 -- only framework.
# 2007-01-23, jw, V0.2 -- utf8 and latin1 for ttys.
# 2007-02-08, jw, V0.3 -- utf8 and latin1 for files.
# 2010-06-20, jw, V0.6 -- using Text::GuessEncoding
use POSIX;
my $version = '0.6';
our $verbose = 0; # used by Text::GuessEncoding, yacc!
my $stdin = 0;
while (defined (my $arg = shift))
{
if ($arg !~ m{^-.}) { unshift @ARGV, $arg; last }
elsif ($arg =~ m{^(-h|--help|-\?)}) { exit usage(); }
elsif ($arg =~ m{^--?v}) { $verbose++; }
elsif ($arg =~ m{^--?q}) { $verbose = 0; }
else { exit usage("unknown option $arg"); }
}
if (!@ARGV and -t STDIN and -t STDERR)
{
my $r = Text::GuessEncoding::probe_tty();
print "$r\n";
exit 0;
}
for my $file (@ARGV)
{
my $fd;
open $fd, ($file eq '-') ? '<&=STDIN' : "<$file" or die "open($file) failed: $!";
Text::GuessEncoding::probe_file($fd, $file);
close $fd;
}
exit 0;
########################################################################
sub usage
{
my ($msg) = @_;
print STDERR qq{$0 V$version usage:
encoding [options] [file]
valid options are:
-h Print this online help
-v Be more verbose. Default $verbose
-q Be quiet
- Read from stdin.
Without any parameters, the terminal (if any) is probed,
using stdin and stderr.
Files are searched for characters outside the ascii range.
Those characters are tested for their likeliness in
various encodings.
Thus an illegal mix of encodings can be detected.
If not verbose, only one single word is printed to stdout:
The name of the most likely encoding.
};
print STDERR "\nERROR: $msg\n" if $msg;
return 0;
}