The Perl Toolchain Summit 2025 Needs You: You can help 🙏 Learn more

use strict;
#Portable Executable (PE) 32 bit, little endian
#Used on MSWindows systems (including DOS) for EXEs and DLLs
#
#1999 paper:
#
#2006 with updates relevant for .NET:
sub UTCTimeStamp {
my ($name) = @_;
return Data::ParseBinary::lib::ExecPE32::UTCTimeStampAdapter->create(ULInt32($name));
}
my $msdos_header = Struct("msdos_header",
Magic("MZ"),
ULInt16("partPag"),
ULInt16("page_count"),
ULInt16("relocation_count"),
ULInt16("header_size"),
ULInt16("minmem"),
ULInt16("maxmem"),
ULInt16("relocation_stackseg"),
ULInt16("exe_stackptr"),
ULInt16("checksum"),
ULInt16("exe_ip"),
ULInt16("relocation_codeseg"),
ULInt16("table_offset"),
ULInt16("overlay"),
Padding(8),
ULInt16("oem_id"),
ULInt16("oem_info"),
Padding(20),
ULInt32("coff_header_pointer"),
Anchor("_assembly_start"),
Field("code", sub { $_->ctx->{coff_header_pointer} - $_->ctx->{_assembly_start} } ),
);
my $symbol_table = Struct("symbol_table",
String("name", 8, padchar => "\x00"),
ULInt32("value"),
Enum(
Data::ParseBinary::lib::ExecPE32::OneDownAdapter->create(SLInt16("section_number")),
#ExprAdapter(SLInt16("section_number"),
# encoder => sub { $_->obj + 1 },
# decoder => sub { $_->obj - 1 },
#),
UNDEFINED => -1,
ABSOLUTE => -2,
DEBUG => -3,
_default_ => $DefaultPass,
),
Enum(ULInt8("complex_type"),
NULL => 0,
POINTER => 1,
FUNCTION => 2,
ARRAY => 3,
),
Enum(ULInt8("base_type"),
NULL => 0,
VOID => 1,
CHAR => 2,
SHORT => 3,
INT => 4,
LONG => 5,
FLOAT => 6,
DOUBLE => 7,
STRUCT => 8,
UNION => 9,
ENUM => 10,
MOE => 11,
BYTE => 12,
WORD => 13,
UINT => 14,
DWORD => 15,
),
Enum(ULInt8("storage_class"),
END_OF_FUNCTION => 255,
NULL => 0,
AUTOMATIC => 1,
EXTERNAL => 2,
STATIC => 3,
REGISTER => 4,
EXTERNAL_DEF => 5,
LABEL => 6,
UNDEFINED_LABEL => 7,
MEMBER_OF_STRUCT => 8,
ARGUMENT => 9,
STRUCT_TAG => 10,
MEMBER_OF_UNION => 11,
UNION_TAG => 12,
TYPE_DEFINITION => 13,
UNDEFINED_STATIC => 14,
ENUM_TAG => 15,
MEMBER_OF_ENUM => 16,
REGISTER_PARAM => 17,
BIT_FIELD => 18,
BLOCK => 100,
FUNCTION => 101,
END_OF_STRUCT => 102,
FILE => 103,
SECTION => 104,
WEAK_EXTERNAL => 105,
),
ULInt8("number_of_aux_symbols"),
Array(sub { $_->ctx->{number_of_aux_symbols} },
Bytes("aux_symbols", 18)
)
);
my $coff_header = Struct("coff_header",
Magic("PE\x00\x00"),
Enum(ULInt16("machine_type"),
UNKNOWN => 0x0,
AM33 => 0x1d3,
AMD64 => 0x8664,
ARM => 0x1c0,
EBC => 0xebc,
I386 => 0x14c,
IA64 => 0x200,
M32R => 0x9041,
MIPS16 => 0x266,
MIPSFPU => 0x366,
MIPSFPU16 => 0x466,
POWERPC => 0x1f0,
POWERPCFP => 0x1f1,
R4000 => 0x166,
SH3 => 0x1a2,
SH3DSP => 0x1a3,
SH4 => 0x1a6,
SH5=> 0x1a8,
THUMB => 0x1c2,
WCEMIPSV2 => 0x169,
_default_ => $DefaultPass
),
ULInt16("number_of_sections"),
UTCTimeStamp("time_stamp"),
ULInt32("symbol_table_pointer"),
ULInt32("number_of_symbols"),
ULInt16("optional_header_size"),
FlagsEnum(ULInt16("characteristics"),
RELOCS_STRIPPED => 0x0001,
EXECUTABLE_IMAGE => 0x0002,
LINE_NUMS_STRIPPED => 0x0004,
LOCAL_SYMS_STRIPPED => 0x0008,
AGGRESSIVE_WS_TRIM => 0x0010,
LARGE_ADDRESS_AWARE => 0x0020,
MACHINE_16BIT => 0x0040,
BYTES_REVERSED_LO => 0x0080,
MACHINE_32BIT => 0x0100,
DEBUG_STRIPPED => 0x0200,
REMOVABLE_RUN_FROM_SWAP => 0x0400,
SYSTEM => 0x1000,
DLL => 0x2000,
UNIPROCESSOR_ONLY => 0x4000,
BIG_ENDIAN_MACHINE => 0x8000,
),
# symbol table
Pointer(sub { $_->ctx->{symbol_table_pointer} },
Array(sub { $_->ctx->{number_of_symbols} }, $symbol_table)
)
);
sub PEPlusField {
my ($name) = @_;
return IfThenElse($name, sub { $_->ctx->{pe_type} eq "PE32_plus" },
ULInt64(undef),
ULInt32(undef),
);
}
my $optional_header = Struct("optional_header",
# standard fields
Enum(ULInt16("pe_type"),
PE32 => 0x10b,
PE32_plus => 0x20b,
),
ULInt8("major_linker_version"),
ULInt8("minor_linker_version"),
ULInt32("code_size"),
ULInt32("initialized_data_size"),
ULInt32("uninitialized_data_size"),
ULInt32("entry_point_pointer"),
ULInt32("base_of_code"),
# only in PE32 files
If(sub { $_->ctx->{pe_type} eq "PE32" },
ULInt32("base_of_data")
),
# WinNT-specific fields
PEPlusField("image_base"),
ULInt32("section_aligment"),
ULInt32("file_alignment"),
ULInt16("major_os_version"),
ULInt16("minor_os_version"),
ULInt16("major_image_version"),
ULInt16("minor_image_version"),
ULInt16("major_subsystem_version"),
ULInt16("minor_subsystem_version"),
Padding(4),
ULInt32("image_size"),
ULInt32("headers_size"),
ULInt32("checksum"),
Enum(ULInt16("subsystem"),
UNKNOWN => 0,
NATIVE => 1,
WINDOWS_GUI => 2,
WINDOWS_CUI => 3,
POSIX_CIU => 7,
WINDOWS_CE_GUI => 9,
EFI_APPLICATION => 10,
EFI_BOOT_SERVICE_DRIVER => 11,
EFI_RUNTIME_DRIVER => 12,
EFI_ROM => 13,
XBOX => 14,
_defualt_ => $DefaultPass
),
FlagsEnum(ULInt16("dll_characteristics"),
NO_BIND => 0x0800,
WDM_DRIVER => 0x2000,
TERMINAL_SERVER_AWARE => 0x8000,
),
PEPlusField("reserved_stack_size"),
PEPlusField("stack_commit_size"),
PEPlusField("reserved_heap_size"),
PEPlusField("heap_commit_size"),
ULInt32("loader_flags"),
ULInt32("number_of_data_directories"),
Data::ParseBinary::lib::ExecPE32::NamedSequence->create(
Array(sub { $_->ctx->{number_of_data_directories} },
Struct("data_directories",
ULInt32("address"),
ULInt32("size"),
)
),
mapping => {
0 => 'export_table',
1 => 'import_table',
2 => 'resource_table',
3 => 'exception_table',
4 => 'certificate_table',
5 => 'base_relocation_table',
6 => 'debug',
7 => 'architecture',
8 => 'global_ptr',
9 => 'tls_table',
10 => 'load_config_table',
11 => 'bound_import',
12 => 'import_address_table',
13 => 'delay_import_descriptor',
14 => 'complus_runtime_header',
}
),
);
my $section = Struct("section",
String("name", 8, padchar => "\x00"),
ULInt32("virtual_size"),
ULInt32("virtual_address"),
ULInt32("raw_data_size"),
ULInt32("raw_data_pointer"),
ULInt32("relocations_pointer"),
ULInt32("line_numbers_pointer"),
ULInt16("number_of_relocations"),
ULInt16("number_of_line_numbers"),
FlagsEnum(ULInt32("characteristics"),
TYPE_REG => 0x00000000,
TYPE_DSECT => 0x00000001,
TYPE_NOLOAD => 0x00000002,
TYPE_GROUP => 0x00000004,
TYPE_NO_PAD => 0x00000008,
TYPE_COPY => 0x00000010,
CNT_CODE => 0x00000020,
CNT_INITIALIZED_DATA => 0x00000040,
CNT_UNINITIALIZED_DATA => 0x00000080,
LNK_OTHER => 0x00000100,
LNK_INFO => 0x00000200,
TYPE_OVER => 0x00000400,
LNK_REMOVE => 0x00000800,
LNK_COMDAT => 0x00001000,
MEM_FARDATA => 0x00008000,
MEM_PURGEABLE => 0x00020000,
MEM_16BIT => 0x00020000,
MEM_LOCKED => 0x00040000,
MEM_PRELOAD => 0x00080000,
ALIGN_1BYTES => 0x00100000,
ALIGN_2BYTES => 0x00200000,
ALIGN_4BYTES => 0x00300000,
ALIGN_8BYTES => 0x00400000,
ALIGN_16BYTES => 0x00500000,
ALIGN_32BYTES => 0x00600000,
ALIGN_64BYTES => 0x00700000,
ALIGN_128BYTES => 0x00800000,
ALIGN_256BYTES => 0x00900000,
ALIGN_512BYTES => 0x00A00000,
ALIGN_1024BYTES => 0x00B00000,
ALIGN_2048BYTES => 0x00C00000,
ALIGN_4096BYTES => 0x00D00000,
ALIGN_8192BYTES => 0x00E00000,
LNK_NRELOC_OVFL => 0x01000000,
MEM_DISCARDABLE => 0x02000000,
MEM_NOT_CACHED => 0x04000000,
MEM_NOT_PAGED => 0x08000000,
MEM_SHARED => 0x10000000,
MEM_EXECUTE => 0x20000000,
MEM_READ => 0x40000000,
MEM_WRITE => 0x80000000,
),
Pointer(sub { $_->ctx->{raw_data_pointer} },
Field("raw_data", sub { $_->ctx->{raw_data_size} })
),
Pointer(sub { $_->ctx->{line_numbers_pointer} },
Array(sub { $_->ctx->{number_of_line_numbers} },
Struct("line_numbers",
ULInt32("type"),
ULInt16("line_number"),
)
)
),
Pointer(sub { $_->ctx->{relocations_pointer} },
Array(sub { $_->ctx->{number_of_relocations} },
Struct("relocations",
ULInt32("virtual_address"),
ULInt32("symbol_table_index"),
ULInt16("type"),
)
)
),
);
sub min {
my @values = @_;
return undef if @values == 0;
my $ret_val = $values[0];
foreach my $val (@values) {
if ($val < $ret_val) {
$ret_val = $val;
}
}
}
our $pe32_parser = Struct("pe32_file",
# headers
$msdos_header,
$coff_header,
Anchor("_start_of_optional_header"),
$optional_header,
Anchor("_end_of_optional_header"),
Padding(sub { min(0,
$_->ctx->{coff_header}->{optional_header_size} -
$_->ctx->{_end_of_optional_header} +
$_->ctx->{_start_of_optional_header} ) }
),
# sections
Array(sub { $_->ctx->{coff_header}->{number_of_sections} }, $section),
);
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT = qw($pe32_parser);
our @ISA;
BEGIN { @ISA = qw{Data::ParseBinary::Adapter}; }
sub _decode {
my ($self, $value) = @_;
return $value - 1;
}
sub _encode {
my ($self, $tvalue) = @_;
return $tvalue + 1;
}
our @ISA;
BEGIN { @ISA = qw{Data::ParseBinary::Adapter}; }
sub _decode {
my ($self, $value) = @_;
return $value;
#return time.ctime(obj)
}
sub _encode {
my ($self, $tvalue) = @_;
return $tvalue;
#return int(time.mktime(time.strptime(obj)))
}
our @ISA;
BEGIN { @ISA = qw{Data::ParseBinary::Adapter}; }
#"""
#creates a mapping between the elements of a sequence and their respective
#names. this is useful for sequences of a variable length, where each
#element in the sequence has a name (as is the case with the data
#directories of the PE header)
#"""
sub _init {
my ($self, %params) = @_;
die "You need to specify mapping to NamedSequence" unless $params{mapping};
$self->{mapping} = $params{mapping};
my $rev_mapping = {};
while (my ($key, $val) = each %{ $params{mapping} }) {
$rev_mapping->{$val} = $key;
}
$self->{rev_mapping} = $rev_mapping;
}
sub _decode {
my ($self, $value) = @_;
my $tvalue = {};
foreach my $ix (0..$#$value) {
my $name = $ix;
$name = $self->{mapping}->{$name} if exists $self->{mapping}->{$name};
$tvalue->{$name} = $value->[$ix];
}
return $tvalue;
}
sub _encode {
my ($self, $tvalue) = @_;
my $value = [];
while (my ($key, $val) = each %$tvalue) {
my $index = $key;
if (exists $self->{rev_mapping}->{$index}) {
$index = $self->{rev_mapping}->{$index};
} elsif ($index !~ /^\d+$/) {
die "NamedSequence: encoded value should be either a recognized name or a number";
}
$value->[$index] = $val;
}
return $value;
}
#__slots__ = ["mapping", "rev_mapping"]
#prefix = "unnamed_"
#def __init__(self, subcon, mapping):
# Adapter.__init__(self, subcon)
# self.mapping = mapping
# self.rev_mapping = dict((v, k) for k, v in mapping.iteritems())
#def _encode(self, obj, context):
# d = obj.__dict__
# obj2 = [None] * len(d)
# for name, value in d.iteritems():
# if name in self.rev_mapping:
# index = self.rev_mapping[name]
# elif name.startswith("__"):
# obj2.pop(-1)
# continue
# elif name.startswith(self.prefix):
# index = int(name.split(self.prefix)[1])
# else:
# raise ValueError("no mapping defined for %r" % (name,))
# obj2[index] = value
# return obj2
#def _decode(self, obj, context):
# obj2 = Container()
# for i, item in enumerate(obj):
# if i in self.mapping:
# name = self.mapping[i]
# else:
# name = "%s%d" % (self.prefix, i)
# setattr(obj2, name, item)
# return obj2
1;
__END__
=head1 NAME
Data::ParseBinary::Executable::PE32 - Parsing Win32 EXE / DLL files
=head1 SYNOPSIS
use Data::ParseBinary::Executable::PE32 qw{$pe32_parser};
my $data = $pe32_parser->parse(CreateStreamReader(File => $fh));
Can parse a Windows (and DOS?) EXE and DLL files. However, when building it back,
there are some minor differences from the original file, and Windows declare that
it's not a valid Win32 application.
This is a part of the Data::ParseBinary package, and is just one ready-made parser.
please go to the main page for additional usage info.
=cut