#!/local/usr/bin/perl # C&C NLP tools # Copyright (c) Universities of Edinburgh, Oxford and Sydney # Copyright (c) James R. Curran # # This software is covered by a non-commercial use licence. # See LICENCE.txt for the full text of the licence. # # If LICENCE.txt is not included in this distribution # please email candc@it.usyd.edu.au to obtain a copy. # takes a file of dependencies with scores and filters those below a # threshold; also compares against a gold standard $deps = shift; $gold = shift; $thresh = shift; open(DEPS, "$deps") || die("can't open deps file"); open(GOLD, "$gold") || die("can't open gold file"); print STDOUT "\# this file was generated by the following command(s):\n"; print STDOUT "\# perl partial_deps.pl $deps $gold $thresh\n\n"; $global_corr = 0; $global_p = 0; $global_r = 0; $scorr = 0; $stotal = 0; sub eval_deps { $correct = 0; $sflag = 1; while(($k, $v) = each(%deps)){ $correct++ if(defined($gold{$k})); if(!defined($gold{$k})){ $sflag = 0; #print "incorrect: $k\n"; } } if(keys(%deps) != 0 && keys(%gold) != 0){ $global_corr += $correct; $global_p += scalar(keys(%deps)); $global_r += scalar(keys(%gold)); } if(keys(%gold) != 0 && keys(%gold) != 0){ $stotal++; $scorr++ if($sflag && keys(%deps) == keys(%gold)); } $gs = scalar(keys(%gold)); $ps = scalar(keys(%deps)); #print "correct: $correct\n"; #print "gold size: $gs\n"; #print "deps size: $ps\n"; #print "\n"; } sub read_gold { %gold = (); while(){ # read comments at the beginning of gold file if(/^\# /){ while(($g = ) !~ /^$/){ ; } next; } return if(/^$/); chomp; $gold{$_} = 1; } } while(){ if(/^\# /){ while(($d = ) !~ /^$/){ ; } next; } if(/^$/){ print STDOUT "\n"; read_gold(); eval_deps(); %deps = (); next; } /^(\S+) (.*)$/; $nderivs = $1; $dep = $2; if($nderivs >= $thresh){ print STDOUT "$dep\n"; $deps{$dep} = 1; } } $p = ($global_corr * 100) / $global_p; $r = ($global_corr * 100) / $global_r; $sacc = ($scorr * 100) / $stotal; print STDERR "precision: $global_corr out of $global_p = $p\n"; print STDERR "recall: $global_corr out of $global_r = $r\n"; print STDERR "sentences correct: $scorr out of $stotal = $sacc\n";