#!/usr/bin/env perl
use
Encode
qw(encode_utf8 decode_utf8 decode)
;
my
%field_spec
= (
"an"
=>
"author_name"
,
"aN"
=>
"author_name_mm"
,
"ae"
=>
"author_email"
,
"aE"
=>
"author_email_mm"
,
"cn"
=>
"committer_name"
,
"cN"
=>
"committer_name_mm"
,
"ce"
=>
"committer_email"
,
"cE"
=>
"committer_email_mm"
,
"H"
=>
"commit_hash"
,
"h"
=>
"abbrev_hash"
,
"s"
=>
"commit_subject"
,
);
my
@field_codes
=
sort
keys
%field_spec
;
my
@field_names
=
map
{
$field_spec
{
$_
} }
@field_codes
;
my
$tformat
=
join
"%x00"
,
map
{
"%"
.
$_
}
@field_codes
;
sub
_make_name_author_info {
my
(
$author_info
,
$commit_info
,
$name_key
)=
@_
;
(
my
$email_key
=
$name_key
) =~ s/name/email/;
my
$email
=
$commit_info
->{
$email_key
};
my
$name
=
$commit_info
->{
$name_key
};
my
$line
=
$author_info
->{
"email2line"
}{
$email
}
//
$author_info
->{
"name2line"
}{
$name
};
$line
//=
sprintf
"%-31s<%s>"
,
$commit_info
->{
$name_key
},
$commit_info
->{
$email_key
};
return
$line
;
}
sub
_make_name_simple {
my
(
$commit_info
,
$key
)=
@_
;
my
$name_key
=
$key
.
"_name"
;
my
$email_key
=
$key
.
"_email"
;
return
sprintf
"%s <%s>"
,
$commit_info
->{
$name_key
},
lc
(
$commit_info
->{
$email_key
});
}
sub
read_commit_log {
my
(
$author_info
,
$mailmap_info
)=
@_
;
$author_info
||= {};
open
my
$fh
,
qq(git log --pretty='tformat:$tformat' |)
;
while
(
defined
(
my
$line
= <
$fh
>)) {
chomp
$line
;
$line
= decode_utf8(
$line
);
my
$commit_info
= {};
@{
$commit_info
}{
@field_names
}=
split
/\0/,
$line
, 0 +
@field_names
;
my
$author_name_mm
= _make_name_author_info(
$author_info
,
$commit_info
,
"author_name_mm"
);
my
$committer_name_mm
=
_make_name_author_info(
$author_info
,
$commit_info
,
"committer_name_mm"
);
my
$author_name_real
= _make_name_simple(
$commit_info
,
"author"
);
my
$committer_name_real
= _make_name_simple(
$commit_info
,
"committer"
);
_check_name_mailmap(
$mailmap_info
,
$author_name_mm
,
$author_name_real
,
$commit_info
,
"author name"
);
_check_name_mailmap(
$mailmap_info
,
$committer_name_mm
,
$committer_name_real
,
$commit_info
,
"committer name"
);
$author_info
->{
"lines"
}{
$author_name_mm
}++;
$author_info
->{
"lines"
}{
$committer_name_mm
}++;
}
return
$author_info
;
}
sub
read_authors {
my
(
$authors_file
)=
@_
;
$authors_file
||=
"AUTHORS"
;
my
@authors_preamble
;
open
my
$in_fh
,
"<"
,
$authors_file
or
die
"Failed to open for read '$authors_file': $!"
;
while
(
defined
(
my
$line
= <
$in_fh
>)) {
chomp
$line
;
push
@authors_preamble
,
$line
;
if
(
$line
=~ /^--/) {
last
;
}
}
my
%author_info
;
while
(
defined
(
my
$line
= <
$in_fh
>)) {
chomp
$line
;
$line
= decode_utf8(
$line
);
my
(
$name
,
$email
);
my
$copy
=
$line
;
$copy
=~ s/\s+\z//;
if
(
$copy
=~ s/<([^<>]*)>//) {
$email
= $1;
}
elsif
(
$copy
=~ s/\s+(\@\w+)\z//) {
$email
= $1;
}
$copy
=~ s/\s+\z//;
$name
=
$copy
;
$email
//=
"unknown"
;
$email
=
lc
(
$email
);
$author_info
{
"lines"
}{
$line
}++;
$author_info
{
"email2line"
}{
$email
}=
$line
if
$email
and
$email
ne
"unknown"
;
$author_info
{
"name2line"
}{
$name
}=
$line
if
$name
and
$name
ne
"unknown"
;
$author_info
{
"email2name"
}{
lc
(
$email
) }=
$name
if
$email
and
$name
and
$email
ne
"unknown"
;
$author_info
{
"name2email"
}{
$name
}=
$email
if
$name
and
$name
ne
"unknown"
;
}
close
$in_fh
or
die
"Failed to close '$authors_file': $!"
;
return
(\
%author_info
, \
@authors_preamble
);
}
sub
update_authors {
my
(
$author_info
,
$authors_preamble
,
$authors_file
)=
@_
;
$authors_file
||=
"AUTHORS"
;
my
$authors_file_new
=
$authors_file
.
".new"
;
open
my
$out_fh
,
">"
,
$authors_file_new
or
die
"Failed to open for write '$authors_file_new': $!"
;
binmode
$out_fh
;
foreach
my
$line
(
@$authors_preamble
) {
print
$out_fh
encode_utf8(
$line
),
"\n"
or
die
"Failed to print to '$authors_file_new': $!"
;
}
foreach
my
$author
(_sorted_hash_keys(
$author_info
->{
"lines"
})) {
next
if
$author
=~ /^unknown/;
if
(
$author
=~ s/\s*<unknown>\z//) {
next
if
$author
=~ /^\w+$/;
}
print
$out_fh
encode_utf8(
$author
),
"\n"
or
die
"Failed to print to '$authors_file_new': $!"
;
}
close
$out_fh
or
die
"Failed to close '$authors_file_new': $!"
;
rename
$authors_file_new
,
$authors_file
or
die
"Failed to rename '$authors_file_new' to '$authors_file':$!"
;
return
1;
}
sub
read_mailmap {
my
(
$mailmap_file
)=
@_
;
$mailmap_file
||=
".mailmap"
;
open
my
$in
,
"<"
,
$mailmap_file
or
die
"Failed to read '$mailmap_file': $!"
;
my
%mailmap_hash
;
my
@mailmap_preamble
;
my
$line_num
= 0;
while
(
defined
(
my
$line
= <
$in
>)) {
++
$line_num
;
next
unless
$line
=~ /\S/;
chomp
(
$line
);
$line
= decode_utf8(
$line
);
if
(
$line
=~ /^
if
(!
keys
%mailmap_hash
) {
push
@mailmap_preamble
,
$line
;
}
else
{
die
encode_utf8
"Not expecting comments after header "
,
"finished at line $line_num!\nLine: $line\n"
;
}
}
else
{
$mailmap_hash
{
$line
}=
$line_num
;
}
}
close
$in
;
return
\
%mailmap_hash
, \
@mailmap_preamble
;
}
sub
merge_mailmap_with_AUTHORS_and_checkAUTHORS_data {
my
(
$mailmap_hash
,
$author_info
)=
@_
;
require
'Porting/checkAUTHORS.pl'
or
die
"No authors?"
;
my
(
$map
,
$preferred_email_or_github
)=
Porting::checkAUTHORS::generate_known_author_map();
foreach
my
$old
(
sort
keys
%$preferred_email_or_github
) {
my
$new
=
$preferred_email_or_github
->{
$old
};
next
if
$old
!~ /\@/ or
$new
!~ /\@/ or
$new
eq
$old
;
my
$name
=
$author_info
->{
"email2name"
}{
$new
};
if
(
$name
) {
my
$line
=
"$name <$new> <$old>"
;
$mailmap_hash
->{
$line
}++;
}
}
return
1;
}
sub
_sorted_hash_keys {
my
(
$hash
)=
@_
;
my
@sorted
=
sort
{
lc
(
$a
) cmp
lc
(
$b
) ||
$a
cmp
$b
}
keys
%$hash
;
return
@sorted
;
}
sub
update_mailmap {
my
(
$mailmap_hash
,
$mailmap_preamble
,
$mailmap_file
)=
@_
;
$mailmap_file
||=
".mailmap"
;
my
$mailmap_file_new
=
$mailmap_file
.
"_new"
;
open
my
$out
,
">"
,
$mailmap_file_new
or
die
"Failed to write '$mailmap_file_new':$!"
;
binmode
$out
;
foreach
my
$line
(
@$mailmap_preamble
, _sorted_hash_keys(
$mailmap_hash
),) {
print
$out
encode_utf8(
$line
),
"\n"
or
die
"Failed to print to '$mailmap_file': $!"
;
}
close
$out
;
rename
$mailmap_file_new
,
$mailmap_file
or
die
"Failed to rename '$mailmap_file_new' to '$mailmap_file':$!"
;
return
1;
}
sub
parse_mailmap_hash {
my
(
$mailmap_hash
)=
@_
;
my
@recs
;
foreach
my
$line
(
sort
keys
%$mailmap_hash
) {
my
$line_num
=
$mailmap_hash
->{
$line
};
$line
=~ /^ \s* (?: ( [^<>]*? ) \s+ )? <([^<>]*)>
(?: \s+ (?: ( [^<>]*? ) \s+ )? <([^<>]*)> )? \s* \z /x
or
die
encode_utf8
"Failed to parse line num $line_num: '$line'"
;
if
(!$1 or !$2) {
die
encode_utf8
"Both preferred name and email are mandatory "
,
"in line num $line_num: '$line'"
;
}
push
@recs
, [ $1, $2, $3, $4,
$line_num
];
}
return
\
@recs
;
}
sub
_safe_set_key {
my
(
$hash
,
$root_key
,
$key
,
$val
,
$pretty_name
)=
@_
;
$hash
->{
$root_key
}{
$key
} //=
$val
;
my
$prev
=
$hash
->{
$root_key
}{
$key
};
if
(
$prev
ne
$val
) {
die
encode_utf8
"Collision on mapping $root_key: "
.
" '$key' maps to '$prev' and '$val'\n"
;
}
}
my
$O2P
=
"other2preferred"
;
my
$O2PN
=
"other2preferred_name"
;
my
$O2PE
=
"other2preferred_email"
;
my
$P2O
=
"preferred2other"
;
my
$N2P
=
"name2preferred"
;
my
$E2P
=
"email2preferred"
;
my
$blurb
=
""
;
sub
_check_name_mailmap {
my
(
$mailmap_info
,
$auth_name
,
$raw_name
,
$commit_info
,
$descr
)=
@_
;
my
$name
=
$auth_name
;
$name
=~ s/<([^<>]+)>/<\L$1\E>/
or
$name
=~ s/(\s)(\@\w+)\z/$1<\L$2\E>/
or
$name
.=
" <unknown>"
;
$name
=~ s/\s+/ /g;
if
(!
$mailmap_info
->{
$P2O
}{
$name
}) {
warn
encode_utf8
sprintf
"Unknown %s '%s' in commit %s '%s'\n%s"
,
$descr
,
$name
,
$commit_info
->{
"abbrev_hash"
},
$commit_info
->{
"commit_subject"
},
$blurb
;
$mailmap_info
->{add}{
"$name $raw_name"
}++;
return
0;
}
elsif
(!
$mailmap_info
->{
$P2O
}{
$name
}{
$raw_name
}) {
$mailmap_info
->{add}{
"$name $raw_name"
}++;
}
return
1;
}
sub
check_fix_mailmap_hash {
my
(
$mailmap_hash
,
$authors_info
)=
@_
;
my
$parsed
= parse_mailmap_hash(
$mailmap_hash
);
my
@fixed
;
my
%seen_map
;
my
%pref_groups
;
foreach
my
$rec
(
@$parsed
) {
my
(
$pname
,
$pemail
,
$oname
,
$oemail
,
$line_num
)=
@$rec
;
$pemail
=
lc
(
$pemail
);
$oemail
=
lc
(
$oemail
)
if
defined
$oemail
;
if
(
$pname
=~ /=\?UTF-8\?/) {
$pname
= decode(
"MIME-Header"
,
$pname
);
}
my
$auth_email
=
$authors_info
->{
"name2email"
}{
$pname
};
if
(
$auth_email
) {
$pemail
=
$auth_email
;
}
my
$auth_name
=
$authors_info
->{
"email2name"
}{
$pemail
};
if
(
$auth_name
) {
$pname
=
$auth_name
;
}
if
(
$pname
ne
"unknown"
) {
if
(
my
$email
=
$seen_map
{
"name"
}{
$pname
}) {
if
(
$email
ne
$pemail
) {
warn
encode_utf8
"Inconsistent emails for name '$pname'"
.
" at line num $line_num: keeping '$email',"
.
" ignoring '$pemail'\n"
;
$pemail
=
$email
;
}
}
else
{
$seen_map
{
"name"
}{
$pname
}=
$pemail
;
}
}
if
(
$pemail
ne
"unknown"
) {
if
(
my
$name
=
$seen_map
{
"email"
}{
$pemail
}) {
if
(
$name
ne
$pname
) {
warn
encode_utf8
"Inconsistent name for email '$pemail'"
.
" at line num $line_num: keeping '$name', ignoring"
.
" '$pname'\n"
;
$pname
=
$name
;
}
}
else
{
$seen_map
{
"email"
}{
$pemail
}=
$pname
;
}
}
$pref_groups
{
"$pname $pemail"
}{
$oemail
}{
$oname
||
""
}=
[
$pname
,
$pemail
,
$oname
,
$oemail
,
$line_num
];
}
foreach
my
$pref
(_sorted_hash_keys(\
%pref_groups
)) {
my
$entries
=
$pref_groups
{
$pref
};
foreach
my
$email
(_sorted_hash_keys(
$entries
)) {
my
@names
= _sorted_hash_keys(
$entries
->{
$email
});
if
(
$names
[0] eq
""
and
@names
> 1) {
shift
@names
;
}
foreach
my
$name
(
@names
) {
push
@fixed
,
$entries
->{
$email
}{
$name
};
}
}
}
my
$new_mailmap_hash
= {};
my
$mailmap_info
= {};
foreach
my
$rec
(
@fixed
) {
my
(
$pname
,
$pemail
,
$oname
,
$oemail
,
$line_num
)=
@$rec
;
my
$preferred
=
"$pname <$pemail>"
;
my
$other
;
if
(
defined
$oemail
) {
$other
=
$oname
?
"$oname <$oemail>"
:
"<$oemail>"
;
}
if
(
$other
and
$other
ne
"<unknown>"
) {
_safe_set_key(
$mailmap_info
,
$O2P
,
$other
,
$preferred
);
_safe_set_key(
$mailmap_info
,
$O2PN
,
$other
,
$pname
);
_safe_set_key(
$mailmap_info
,
$O2PE
,
$other
,
$pemail
);
}
$mailmap_info
->{
$P2O
}{
$preferred
}{
$other
}++;
if
(
$pname
ne
"unknown"
) {
_safe_set_key(
$mailmap_info
,
$N2P
,
$pname
,
$preferred
);
}
if
(
$pemail
ne
"unknown"
) {
_safe_set_key(
$mailmap_info
,
$E2P
,
$pemail
,
$preferred
);
}
my
$line
=
$preferred
;
$line
.=
" $other"
if
$other
;
$new_mailmap_hash
->{
$line
}=
$line_num
;
}
return
(
$new_mailmap_hash
,
$mailmap_info
);
}
sub
add_new_mailmap_entries {
my
(
$mailmap_hash
,
$mailmap_info
,
$mailmap_file
)=
@_
;
my
$mailmap_add
=
$mailmap_info
->{add}
or
return
0;
my
$num
= 0;
for
my
$new
(
sort
keys
%$mailmap_add
) {
!
$mailmap_hash
->{
$new
}++ or
next
;
warn
encode_utf8
"Updating '$mailmap_file' with: $new\n"
;
$num
++;
}
return
$num
;
}
sub
read_and_update {
my
(
$authors_file
,
$mailmap_file
)=
@_
;
my
(
$author_info
,
$authors_preamble
)= read_authors(
$authors_file
);
my
(
$orig_mailmap_hash
,
$mailmap_preamble
)= read_mailmap(
$mailmap_file
);
my
(
$mailmap_hash
,
$mailmap_info
)=
check_fix_mailmap_hash(
$orig_mailmap_hash
,
$author_info
);
update_mailmap(
$mailmap_hash
,
$mailmap_preamble
,
$mailmap_file
);
read_commit_log(
$author_info
,
$mailmap_info
);
update_authors(
$author_info
,
$authors_preamble
,
$authors_file
);
add_new_mailmap_entries(
$mailmap_hash
,
$mailmap_info
,
$mailmap_file
)
and update_mailmap(
$mailmap_hash
,
$mailmap_preamble
,
$mailmap_file
,
$mailmap_info
);
return
undef
;
}
sub
main {
local
$Data::Dumper::Sortkeys
= 1;
my
$authors_file
=
"AUTHORS"
;
my
$mailmap_file
=
".mailmap"
;
my
$show_man
= 0;
my
$show_help
= 0;
GetOptions(
'help|?'
=> \
$show_help
,
'man'
=> \
$show_man
,
'authors_file|authors-file=s'
=> \
$authors_file
,
'mailmap_file|mailmap-file=s'
=> \
$mailmap_file
,
) or pod2usage(2);
pod2usage(1)
if
$show_help
;
pod2usage(
-verbose
=> 2)
if
$show_man
;
read_and_update(
$authors_file
,
$mailmap_file
);
return
0;
}
exit
(main())
unless
caller
;
1;