#!/usr/bin/ruby
#
## https://rosettacode.org/wiki/Natural_sorting#Sidef
#
class String {
# Sort groups of digits in number order. Sort by order of magnitude then lexically.
-> naturally { self.lc.gsub(/(\d+)/, {|s1| "0" + s1.len.chr + s1 }) + "\x0" + self };
# Collapse multiple ws characters to a single.
-> collapse { self.gsub(/(\s)\1+/, {|s1| s1 }) };
# Convert all ws characters to a space.
-> normalize { self.gsub(/(\s)/, ' ') };
# Ignore common leading articles for title sorts
-> title { self.sub(/^(?:a|an|the)\b\s*/i, '') };
# Decompose ISO-Latin1 glyphs to their base character.
-> latin1_decompose {
static tr = Hash.new(%w(
Æ AE æ ae Þ TH þ th Ð TH ð th ß ss À A Á A Â A Ã A Ä A Å A à a á a
â a ã a ä a å a Ç C ç c È E É E Ê E Ë E è e é e ê e ë e Ì I Í I Î
I Ï I ì i í i î i ï i Ò O Ó O Ô O Õ O Ö O Ø O ò o ó o ô o õ o ö o
ø o Ñ N ñ n Ù U Ú U Û U Ü U ù u ú u û u ü u Ý Y ÿ y ý y
)...);
var re = Regex.new('(' + tr.keys.join('|') + ')');
self.gsub(re, {|s1| tr{s1} });
}
}
var tests = [
[
"Task 1a\nSort while ignoring leading spaces.",
[
'ignore leading spaces: 1', ' ignore leading spaces: 4',
' ignore leading spaces: 3', ' ignore leading spaces: 2'
],
{ .trim } # builtin method.
],
[
"Task 1b\nSort while ignoring multiple adjacent spaces.",
[
'ignore m.a.s spaces: 3', 'ignore m.a.s spaces: 1',
'ignore m.a.s spaces: 4', 'ignore m.a.s spaces: 2'
],
{ .collapse }
],
[
"Task 2\nSort with all white space normalized to regular spaces.",
[
"Normalized\tspaces: 4", "Normalized\xa0spaces: 1",
"Normalized\x20spaces: 2", "Normalized\nspaces: 3"
],
{ .normalize }
],
[
"Task 3\nSort case independently.",
[
'caSE INDEPENDENT: 3', 'casE INDEPENDENT: 2',
'cASE INDEPENDENT: 4', 'case INDEPENDENT: 1'
],
{ .lc } # builtin method
],
[
"Task 4\nSort groups of digits in natural number order.",
%w(Foo100bar99baz0.txt foo100bar10baz0.txt foo1000bar99baz10.txt
foo1000bar99baz9.txt 201st 32nd 3rd 144th 17th 2 95),
{ .naturally }
],
[
"Task 5 ( mixed with 1, 2, 3 & 4 )\n"
+ "Sort titles, normalize white space, collapse multiple spaces to\n"
+ "single, trim leading white space, ignore common leading articles\n"
+ 'and sort digit groups in natural order.',
[
'The Wind in the Willows 8', ' The 39 Steps 3',
'The 7th Seal 1', 'Wanda 6',
'A Fish Called Wanda 5', ' The Wind and the Lion 7',
'Any Which Way But Loose 4', '12 Monkeys 2'
],
{ .normalize.collapse.trim.title.naturally }
],
[
"Task 6, 7, 8\nMap letters in Latin1 that have accents or decompose to two\n"
+ 'characters to their base characters for sorting.',
%w(apple Ball bald car Card above Æon æon aether
niño nina e-mail Évian evoke außen autumn),
{ .latin1_decompose.naturally }
]
];
tests.each { |case|
var code = case.pop;
var array = case.pop;
say case.pop+"\n";
say "Standard Sort:\n";
array.sort.each { .say };
say "\nNatural Sort:\n";
array.sort_by(code).each { .say };
say "\n#{'*' * 40}\n";
}