=head1 NAME PDL::IO::Grib - Grib file utilities for perl =head1 SYNOPSIS use PDL; use PDL::IO::Grib; $gh = new PDL::IO::Grib; $gh->readgrib("filename" [, $readdata]); $gh->getfield("fieldname"); =head1 DESCRIPTION Grib.pm allows the user to read files in the grib FORM FM 92-IX Ext. GRIB Edition 1 - it may not read all possible grib format combinations. =over 4 =item Grib::new Grib::new creates a GribHandle, which is a reference to a newly created data structure. If it receives any parameters, they are passed to Grib::readgrib; if readgrib fails, the GribHandle object is destroyed. Otherwise, it is returned to the caller. =cut ( $VERSION ) = '$Revision: 1.9 $ ' =~ /\$Revision:\s+([^\s]+)/; package PDL::IO::Grib; use FileHandle; use PDL; use PDL::IO::Misc qw(bswap2); use strict; $PDL::IO::Grib::debug=0; $PDL::IO::Grib::swapbytes=0; sub new { @_ >= 1 or barf 'usage: new Grib [FILENAME]'; my $class = shift; my $gh={}; bless $gh, $class; my $uname = `uname`; chomp($uname); use Config; $PDL::IO::Grib::swapbytes=1 if($Config{byteorder} =~ "1234"); if (@_) { $gh->readgrib(@_ ) or return undef; } } =item Grib::readgrib Grib::readgrib accepts a GribHandle plus one or two parameters. With one parameter, it reads grib header information for all variables in the specified grib file. With two parameters, the first parameter is a filename, and the second parameter indicates that all of the data in the file should be read and decoded. =cut sub readgrib { my($self,$filename,$readdata) = @_; print ">$self<>$filename<>$readdata\n" if($PDL::IO::Grib::debug); my $fh = new FileHandle "$filename" or barf "Could not open '$filename' for reading"; $self->{_FILEHANDLE} = $fh; my $offset=0; my %fields; while(!eof($fh)){ # # Read in the pds # my ($pa_ref, $trl); ($pa_ref,$trl) = readpds($fh); last unless($pa_ref); my ($ga_ref,$ba_ref); if($pa_ref->[5] & 128){ $ga_ref = readgds($fh) ; }else{ $ga_ref->[0]=0; } if($pa_ref->[5] & 64){ $ba_ref = readbms($fh) ; }else{ $ba_ref->[0]=0; } my ($data_ref,$dataloc) = readbds($fh,$pa_ref,$ga_ref,$readdata); # # It takes 5 fields to uniquely identify a grib variable we concatinate these # fields and use that as the hash ref for each variable # my $tnme = "$pa_ref->[6]:$pa_ref->[1]:$pa_ref->[7]:$pa_ref->[8]:$pa_ref->[9]"; $self->{$tnme} = bless {Total_Record_Length => $trl, Record_Offset => $offset, Data_Offset => $dataloc, PDS => $pa_ref, GDS => $ga_ref, BMS => $ba_ref, BDS => $data_ref}, "Field"; # # Why an extra ten bytes - I do not know # $offset+=$trl+10; } $self->get_grib_names(); return($self); } sub get_grib_names{ my($self) = @_; my %namehash; my $namekey; my $table; if(-e ".gribtables"){ $table = new FileHandle ".gribtables" ; }elsif(-e "$ENV{HOME}/.gribtables"){ $table = new FileHandle "$ENV{HOME}/.gribtables" ; } if(defined $table){ print "Reading table file\n"; foreach($table->getlines){ chomp; next if /^\#/; next if /^\s*$/; s/ //g; s/\#.*$//; my($name,@tmp) = split /:/,$_; my $namekey = join(':',@tmp); $namehash{$namekey}=$name; } $table->close(); foreach $namekey (keys %namehash){ my $rec; foreach $rec (keys %$self){ next if($rec =~ /^[^\d]/); # # Two ways to retreve. # # if($namehash{$namekey} eq "T_2M"){ # print "here $rec <> $namekey<\n"; # } if($rec =~ /^$namekey/){ push(@{$self->{$namehash{$namekey}}},$self->{$rec}); $self->{$rec}{name}=$namehash{$namekey}; } } } } return($self); } # What if we read into a pdl instead then only decode when needed? sub readpds{ my($fh) = @_; my ($in,$tin,@pdsarray); # # Read the header, i seem to have problems knowing exactly where it is # but the word GRIB should appear followed by another 4 bytes # my $r = read $fh, $tin, 4; while($tin ne "GRIB" && $r>0){ $r = read $fh, $in, 1; $tin = substr($tin,1).$in; } $r = read $fh, $tin, 4; $tin = pack("xa3",substr($tin,0,3)); print "header: >$tin< $r\n" if($PDL::IO::Grib::debug); return() if($r<4); # # I'm going to return the total record length # my $trl=vec($tin,0,32); # # Read in the pds length # my $rb = read $fh, $tin, 3; $in=$tin; $tin=pack("xa3",$tin); $pdsarray[0] = vec($tin,0,32); if($pdsarray[0] <= 0 || $pdsarray[0] > 500){ print "Error reading pdsarray length: $pdsarray[0]<$in>$rb\n"; } print "pdslength = $pdsarray[0]\n" if($PDL::IO::Grib::debug); read $fh, $tin, $pdsarray[0]-3; $in.=$tin; # each of the next 18 elements are stored in 1 byte my $I; for($I=1;$I<19;$I++){ $pdsarray[$I] = vec($in,$I+2,8); } # elements 8 and 9 depend on the value of 7 if($pdsarray[7]==100 || $pdsarray[7]==103 || $pdsarray[7]==105 || $pdsarray[7]==107 || $pdsarray[7]==109 || $pdsarray[7]==111 || $pdsarray[7]==113 || $pdsarray[7]==125 || $pdsarray[7]==160 ){ $pdsarray[9]+= $pdsarray[8]*256; $pdsarray[8]=0; } # element 19 is in two bytes $pdsarray[19] = vec($in,21,8)*256+vec($in,22,8); for($I=20;$I<23;$I++){ $pdsarray[$I] = vec($in,$I+3,8); } # element 23 is in two bytes $pdsarray[23] = vec($in,26,8)*256+vec($in,27,8); # skip elements 24-37 # # The following is from the HRM users guide of April 1999 # and may need to be modified for other datasets # $tin = substr($in,42,3); $tin=pack("xa3",$tin); $pdsarray[38] = vec($tin,0,32); for($I=41;$I<46;$I++){ $pdsarray[$I] = vec($in,$I+6,8); } $pdsarray[46] = vec($in,52,8)*256+vec($in,53,8); return(\@pdsarray,$trl); } sub readgds{ my($fh,$pdslength)=@_; my(@gdsarray,$tin,$in); # # Read in the GDS length # read $fh, $tin, 3; $in=$tin; $tin=pack("xa3",$tin); $gdsarray[0] = vec($tin,0,32); read $fh, $tin, $gdsarray[0]-3; $in.=$tin; $gdsarray[1] = vec($in,3,8); $gdsarray[2] = vec($in,4,8); $gdsarray[3] = vec($in,5,8); $gdsarray[4] = vec($in,6,8)*256+vec($in,7,8); $gdsarray[5] = vec($in,8,8)*256+vec($in,9,8); # print "$gdsarray[0] $gdsarray[1] $gdsarray[2] $gdsarray[3]\n"; # print "ie=$gdsarray[4] je=$gdsarray[5]\n"; # # There will be a lot of variation between models in this section # # if($gdsarray[3]==10){ # ROTATED LAT/LON GRID $tin = substr($in,10,3); $tin=pack("xa3",$tin); $gdsarray[6] = vec($tin,0,32)*1.0e-3; if($gdsarray[6]>8388.608){ $gdsarray[6]=-$gdsarray[6]+8388.608 } $tin = substr($in,13,3); $tin=pack("xa3",$tin); $gdsarray[7] = vec($tin,0,32)* 1.0e-3; if($gdsarray[7]>8388.608){ $gdsarray[7]=-$gdsarray[7]+8388.608 } $gdsarray[8] = vec($in,16,8); $tin = substr($in,17,3); $tin=pack("xa3",$tin); $gdsarray[9] = vec($tin,0,32)* 1.0e-3; if($gdsarray[9]>8388.608){ $gdsarray[9]=-$gdsarray[9]+8388.608 } $tin = substr($in,20,3); $tin=pack("xa3",$tin); $gdsarray[10] = vec($tin,0,32)* 1.0e-3; if($gdsarray[10]>8388.608){ $gdsarray[10]=-$gdsarray[10]+8388.608 } $gdsarray[11] = vec($in,23,8)*256+vec($in,24,8); $gdsarray[12] = vec($in,25,8)*256+vec($in,26,8); $gdsarray[13] = vec($in,27,8); $tin = substr($in,32,3); $tin=pack("xa3",$tin); $gdsarray[19] = vec($tin,0,32); $tin = substr($in,35,3); $tin=pack("xa3",$tin); $gdsarray[20] = vec($tin,0,32); $tin = substr($in,35,4); $tin=pack("xa4",$tin); $gdsarray[21] = vec($tin,0,32); } if($gdsarray[3]==192){ $tin = substr($in,10,3); $tin=pack("xa3",$tin); $gdsarray[4] = vec($tin,0,32); $tin = substr($in,13,3); $tin=pack("xa3",$tin); $gdsarray[5] = vec($tin,0,32); } return(\@gdsarray); } sub readbms{ my($fh)=@_; my(@bmsarray,$tin,$in); read $fh, $tin, 3; $in=$tin; $tin=pack("xa3",$tin); $bmsarray[0] = vec($tin,0,32); read $fh, $tin, $bmsarray[0]-3; $in.=$tin; $bmsarray[1] = vec($in,3,8); $bmsarray[2] = vec($in,4,8)*256+vec($in,5,8); $bmsarray[3] = substr($in,7,$bmsarray[0]-6) if($bmsarray[0]>6); return(\@bmsarray); } sub readbds{ my($fh,$pds_ref,$gds_ref,$readdata)=@_; my(@bdsarray,$tin,$in); # # Get the data section length # read $fh, $tin, 11; $in=$tin; $tin=pack("xa3",$tin); $bdsarray[0] = vec($tin,0,32); $bdsarray[1] = vec($in,3,8); my $sparebits = $bdsarray[1]&15; my $pm; if (vec($in,4,8)&0x80) {$pm=-1} else {$pm=1}; $bdsarray[2]=2**($pm*( (vec($in,4,8)&0x7F)*256+vec($in,5,8) )); $bdsarray[3] = get_ref_val(substr($in,6,4)); $bdsarray[3] = 0 if(abs($bdsarray[3])<1.0e-18); $bdsarray[4] = vec($in,10,8); print "datasize=$bdsarray[0] $bdsarray[1] scale factor=$bdsarray[2] ref_val=$bdsarray[3] bitsperval=$bdsarray[4] $sparebits\n" if($PDL::IO::Grib::debug); # # get the data location for later use # my $dataloc=$fh->getpos; if($readdata){ $bdsarray[5] = read_data($fh,$$gds_ref[4],$$gds_ref[5],\@bdsarray); }else{ seek $fh, $bdsarray[0]-10, 1; } # # read the end of record info # # # We actually read two bytes past the end of record? # my $eor; my $cnt=0; $cnt = read $fh, $eor, 6-int($sparebits/8); return(\@bdsarray,$dataloc); } sub read_data{ my($fh, $xdim, $ydim, $bds_ref,$dataloc)=@_; print "read_data: $fh, $xdim, $ydim, $bds_ref,$dataloc\n" if($PDL::IO::Grib::debug); $fh->setpos($dataloc) if($dataloc); my $dataarray = PDL->zeroes((new PDL::Type(ushort)),$xdim,$ydim); if($bds_ref->[0]-12 != $xdim*$ydim*2){ print "Error in Grib::read_data $bds_ref->[0] $xdim $ydim\n"; } read $fh, ${$dataarray->get_dataref} ,$bds_ref->[0]-12; bswap2($dataarray) if($PDL::IO::Grib::swapbytes); $dataarray = float($dataarray); unless(($bds_ref->[2] == 1) && ($bds_ref->[3] == 0)){ $dataarray=$dataarray*$bds_ref->[2]+ $bds_ref->[3]; } return($dataarray); } # Subroutine to unpack the reference values (and ref-value like # objects) from 4 bytes sub get_ref_val { my ($in)=@_; my $ref_frac=vec($in,1,8)*256*256+vec($in,2,8)*256+vec($in,3,8); my $ref_mant=vec($in,0,8); my $ref_sign; if ($ref_mant & 128) {$ref_sign=-1} else {$ref_sign=1}; $ref_mant=($ref_mant & 127) - 64; $ref_sign*$ref_frac*2**(-24)*16**($ref_mant); } sub idsort{ my(@a) = split(/:/,$a); my(@b) = split(/:/,$b); # # avoid errors due to sorting on nonnumeric fields # $a =~ /^[^\d]/ or $b =~ /^[^\d]/ or $a[0] <=> $b[0] or $a[1] <=> $b[1] or $a[2] <=> $b[2] or $a[3] <=> $b[3] or $a[4] <=> $b[4] or $a cmp $b; } =item Grib::getfield Grib::getfield accepts one parameter which is either the 5 field identifier for grib variables (pds[6]:pds[1]:pds[7]:pds[8]:pds[9] as defined in the grib format definition) or a variable name associated with that identifier as defined in the file .gribtables and returns the name and data field for that variable. Grib::getfield will check to see if the data has already been read into memory and will only read the file when this is not the case. The data field is returned as a PDL piddle or an array of PDL piddles where the identifier matchs more than one field. =cut sub getfield{ my($gh,$field) = @_; my ($he,$key); my ($level); if($field=~/^\d.*/){ push(@$he, $gh->{$field}) if(exists $gh->{$field}); }else{ if($field=~/^([^:]+):(\d+:*\d*)$/){ $field = $1; $level = $2; } print "looking for >$field<$level<\n" if($PDL::IO::Grib::debug); my @keys = reverse sort idsort keys %$gh; foreach $key (@keys){ next if($key =~ /^[^\d]/); # special keys next unless(defined $gh->{$key}{name}); print "lev: $level\n$key\n" if($PDL::IO::Grib::debug); if($gh->{$key}{name} eq $field){ if(defined $level){ next unless($key=~/:$level$/); } push(@$he,$gh->{$key}); } } } unless(defined $he){ $field.=":$level" if(defined $level); barf "Could not find match for $field in grib file"; return; } my ($href,$retval,@pdllist,$name); foreach $href (@$he){ print "href = $href\n" if($PDL::IO::Grib::debug==1); $name=$href->{name} ; $name.=":$level" if(defined $level); # # If the data has already been read in don't read it again # if(defined $href->{BDS}[5]){ push(@pdllist,$href->{BDS}[5]); }else{ print "reading data \n" if($PDL::IO::Grib::debug); $href->{BDS}[5] = read_data($gh->{_FILEHANDLE},$href->{GDS}[4], $href->{GDS}[5],$href->{BDS},$href->{Data_Offset}); push(@pdllist,$href->{BDS}[5]); } } if($#pdllist>0){ return(\@pdllist); }else{ return($pdllist[0]); } } sub getallfields{ my($gh) = @_; my $fieldlist; foreach(sort idsort keys %$gh){ next if(/^[^\d]/); # ignore special keys push(@$fieldlist,getfield($gh,$_)); } $fieldlist; } # # Create a single 3d piddle from a list of 2d piddles # sub stack2d{ my(@pdls)=@_; my $cube = PDL->zeroes($pdls[0]->type,$pdls[0]->dims,$#pdls+1); # make 3D piddle for (0.. $#pdls){ (my $tmp = $cube->slice(":,:,($_)")) .= $pdls[$_]; } return($cube); } sub stack{ my(@pdls) = @_; my $ndims = $pdls[0]->getndims; my $cube = PDL->zeroes($pdls[0]->type,$pdls[0]->dims,$#pdls+1); # make $ndims+1 piddle my $slice_str; for(1..$ndims){ $slice_str.=":,"; } for (0.. $#pdls){ (my $tmp = $cube->slice("$slice_str($_)")) .= $pdls[$_]; } return($cube); } =item Grib::showinventory Grib::showinventory prints a list of variables found in the open file and names associated with them from the .gribtables file. =cut sub showinventory{ my($gh) = @_; foreach(sort idsort keys %$gh){ next if(/^[^\d]/); # ignore special keys if(defined $gh->{$_}{name}){ print "$_ $gh->{$_}{name}\n"; }else{ print "$_\n"; } } } sub pds_attribute{ my $gh=shift; my $attnum=shift; my $ref = ref($gh); # print "ref = $ref\n"; if($ref eq "Grib"){ # print "This is a Grib ref\n"; my $prevval=-999; foreach(sort idsort keys %$gh){ next if(/^[^\d]/); # ignore special keys my $val = $gh->{$_}{PDS}[$attnum]; # print "$val\n" unless($val==$prevval); $prevval=$val; } $prevval; }elsif($ref eq "Field"){ @_ ? $gh->{PDS}[$attnum] = shift : $gh->{PDS}[$attnum]; } } sub gds_attribute{ my $gh=shift; my $attnum=shift; my $ref = ref($gh); # print "ref = $ref\n"; if($ref eq "Grib"){ # print "This is a Grib ref\n"; my $prevval=-999; foreach(sort idsort keys %$gh){ next if(/^[^\d]/); # ignore special keys my $val = $gh->{$_}{GDS}[$attnum]; # print "$val\n" unless($val==$prevval); $prevval=$val; } $prevval; }elsif($ref eq "Field"){ @_ ? ${$$gh{GDS}}[$attnum] = shift : ${$$gh{GDS}}[$attnum]; } } =item get_grib1_date() Returns the initialization date from a grib file in the form yyyymmddhh. Takes either the file name or a valid Grib handle as an input argument. =cut sub get_grib1_date{ my $gh = shift; my $date; my $passed_file_name; unless(ref($gh) eq "Grib"){ $gh = new Grib($gh); $passed_file_name=1; } $date = ($gh->pds_attribute(10)+1900)*1000000+ $gh->pds_attribute(11)*10000 +$gh->pds_attribute(12)*100+$gh->pds_attribute(13); $gh->close if($passed_file_name==1); $date; } sub close { my($gh) = @_; $gh->{_FILEHANDLE}->close; } 1; =item .gribtables The .gribtables file is searched for first in the working directory then in the user's home directory. The format is rather simple - anything following a B<#> sign is a comment otherwise a name:pds[6]:pds[1]:pds[7]:pds[8]:pds[9] is expected where name can be anything as long as it begins with an alpha character and does not containb{:} and the pds[#] refers to the element number in the pds sector of the grib file. Fields 8 and 9 are optional in the file and if not found all of the records which match fields 6 1 7 will be combined into a single 3d dataset by getfield, unless the name passed in get field is modified by :pds[8]:pds[9] (or simply :pds[9]) which can be used to get a slice of what would otherwise be a 3d variable. So suppose the file .gribtables contains the entry T_PG:11:2:100 to specify 3d temperature on a pressure grid, then to get the 500mb pressure you would do $t500 = $gh->getfield ("T_PG:500"); =back =head1 Author Jim Edwards <jedwards@inmet.gov.br> =cut