
budman
User
Feb 9, 2011, 8:46 PM
Post #4 of 5
(1715 views)
|
|
Re: [budman] Hot to create a hash out of this record....
[In reply to]
|
Can't Post
|
|
Seeing that you have embedded tags, that makes recursion a little difficult. :) So I tried to tokenize it this way - it works. You may need to tweak it a little more, but I think it should solve it for you.
#!/usr/bin/perl use strict; use Data::Dumper; my @lines = ( "101<seller_id,><bucket0,<bucket_id,100><cost,<amt,35.00><currency_code,USD>><counter_party,SFJCXFDLNFR5U>>", "102<seller_id,7783><bucket0,<bucket_id,107><cost,<amt,999.00><currency_code,USD><expiry,>><counter_party,SFJCXFDLNFR5U>><bucket1,<bucket_id,1000><cost,<amt,223.00><currency_code,USD>>>" ); my %records; foreach my $line (@lines) { my %rec = parseRecord($line); print Dumper(\%rec); } sub parseRecord { my ($rec) = @_; my $count; (my $t = $rec) =~ s/(<|>)/$count++/eg; die "Error: uneven bracket count\n$rec\n" if $count % 2; my ($id) = $rec =~ /^(.*?)</; my ($r,@keypath,$field_name); while ($rec) { # check for end of field if ( $rec =~ /^>/ ) { pop @keypath; $rec =~ s/^>//; } # grab the field/column name ($field_name,$rec) = $rec =~ /<(.*?),(.*)$/; next unless $field_name; # breadcrumb trail my $keyname = join(":",@keypath).(@keypath?':':'').$field_name; #print "$id: $keyname : $rec\n"; if ( $rec =~ /^</ ) { push @keypath, $field_name; } # assign the value else { my $value; ($value,$rec) = $rec =~ /^(.*?)>(.*)$/; $r->{$keyname} = $value || ''; } } # convert trail to hash keys my %Record; foreach my $key (sort keys %$r) { next if grep { /^$key:/ } keys %$r; if ( $key =~ /:/ ) { my $keyname = join("'}{'",split(/:/,$key)); my $str = "\$Record{$id}{'$keyname'} = \$r->{'$key'}"; eval $str; } else { $Record{$id}{$key} = $r->{$key}; } } return %Record; } some output
$VAR1 = { '101' => { 'bucket0' => { 'cost' => { 'currency_code' => 'USD', 'amt' => '35.00' }, 'counter_party' => 'SFJCXFDLNFR5U', 'bucket_id' => '100' }, 'seller_id' => '' } }; $VAR1 = { '102' => { 'bucket0' => { 'cost' => { 'expiry' => '', 'currency_code' => 'USD', 'amt' => '999.00' }, 'counter_party' => 'SFJCXFDLNFR5U', 'bucket_id' => '107' }, 'seller_id' => '7783', 'bucket1' => { 'cost' => { 'currency_code' => 'USD', 'amt' => '223.00' }, 'bucket_id' => '1000' } } }; Rich
(This post was edited by budman on Feb 9, 2011, 8:52 PM)
|