#!/usr/bin/perl -w

# Minimum Error Rate Training
# (c) 2002-2006 Philipp Koehn
# Train a phrase model from a parallel corpus
# Version released for WMT 2006 Shared Task

# Usage:
# minimum-error-rate-training.perl <working-dir> <french> <english> <n-best> <decoder> <params> <lambdas> <start-step>

# Notes:
# <french> and <english> should be raw text files, one sentence per line
# <english> can be a prefix, in which case the files are <english>0, <english>1, etc.
# <start-step> is what iteration to start at (default 1). If you add an 'a'
#   suffix the decoding half of that iteration will be skipped.

# Revision history

# 21 Jan 2005 unified various versions, thorough cleanup (DWC)
#  now indexing accumulated n-best list solely by feature vectors

# 14 Dec 2004 reimplemented find_threshold_points in C (NMD)

# 25 Oct 2004 Use either average or shortest (default) reference
#  length as effective reference length (DWC)

# 13 Oct 2004 Use alternative decoders (DWC)

# 30 Sep 2004 Modified for local environment (DWC):
#  - for Perl 5.8 (binary file I/O)
#  - for Carmel 3.0 (change in output format)

# Original version by Philipp Koehn

use Inline C => DATA =>;
use strict;
#use utf8;

my $SMALL = 1e-10;

my $___WORKING_DIR = $ARGV[0];
my $___DEV_F = $ARGV[1];
my $___DEV_E = $ARGV[2];
my $___N_BEST_LIST_SIZE = $ARGV[3];
my $___DECODER = $ARGV[4]; 
my $___PARAMETERS = $ARGV[5];
my $___LAMBDA = $ARGV[6];
my $___START = $ARGV[7];

my $verbose = 4;
my $reference_length_mode = "shortest"; 
my $start_run = 1;
my $skip_decoder = 0;
if ($___START) {
  if ($___START =~ /(\d+)a/) {
    $start_run = $1;
    $skip_decoder = 1;
  } elsif ($___START =~ /\d+/) {
    $start_run = $___START;
    $skip_decoder = 0;
  } else {
    print "Bad start step: $___START\n";
  }
}

my $n_random = 20;

my $lambda_ranges = $___LAMBDA;

# Transform lambda option into decoder options
my (@LAMBDA,@MIN,@MAX,$rand,$decoder_config);

if ($lambda_ranges =~ /lm/) {
    foreach (split(/\s+/,$lambda_ranges)) {
	my ($name,$values) = split(/:/);
	$decoder_config .= "-$name ";
	foreach my $feature (split(/;/,$values)) {
	    if ($feature =~ /^(-?[\.\d]+),(-?[\.\d]+)-(-?[\.\d]+)$/) {
		if ($1 == $2 && $2 == $3) {
		    $decoder_config .= "$1 ";
		}
		else {
		    push @LAMBDA, $1;
		    push @MIN, $2;
		    push @MAX, $3;
		    $decoder_config .= "%.6f ";
		    $rand .= "$2+rand(".($3-$2)."), ";
		}
	    }
	    else {
		print "BUGGY FEATURE RANGE DEFINITION: $name => $feature\n";
	    }
	} 
    }
}
else {
    foreach my $feature (split(/;/,$lambda_ranges)) {
	if ($feature =~ /^(-?[\.\d]+),(-?[\.\d]+)-(-?[\.\d]+)$/) {
	    push @LAMBDA, $1;
	    push @MIN, $2;
	    push @MAX, $3;
	    $rand .= "$2+rand(".($3-$2)."), ";
	}
	else {
	    print "BUGGY FEATURE RANGE DEFINITION: $feature\n";
	} 
    }
}
print "RANGE: $rand\n"; 

print "filtering the phrase table... ".`date`;
$___PARAMETERS =~ s/ \-f / -config /;
$___PARAMETERS =~ s/^\-f /-config /;
my $PARAMETERS;
if ($___PARAMETERS =~ /-config (\S+)/) {
    my $config = $1;
    my $filtered_parameters = $___PARAMETERS;
    $filtered_parameters =~ s/-config *(\S+)//;
    print "run-filtered-pharaoh.perl $___WORKING_DIR/filtered $___DECODER $config $___DEV_F \"$filtered_parameters -norun\"\n";
    `run-filtered-pharaoh.perl $___WORKING_DIR/filtered $___DECODER $config $___DEV_F "$filtered_parameters -norun"`;
    $PARAMETERS = $___PARAMETERS;
    $PARAMETERS =~ s/-config (\S+)/-config $___WORKING_DIR\/filtered\/pharaoh.ini/;
    if ($PARAMETERS =~ /^(.*)-distortion-file +(\S.*) (-.+)$/ || 
	$PARAMETERS =~ /^(.*)-distortion-file +(\S.*)()$/) {
	my ($pre,$files,$post) = ($1,$2,$3);
	$PARAMETERS = "$pre -distortion-file ";
	foreach my $distortion (split(/ +/,$files)) {
	    my $out = $distortion;
	    $out =~ s/^.*\/+([^\/]+)/$1/g;
	    $PARAMETERS .= "$___WORKING_DIR/filtered/$out";
	}
	$PARAMETERS .= $post;
    }
}

print "reading reference translations... ".`date`;
my (@DEV_E,$sentence_count,$multi_flag,$length_reference);
if (-e $___DEV_E) {
    open DEV_E_FILE, "<:raw", $___DEV_E;
    #@DEV_E = `cat $___DEV_E`; 
    @DEV_E = <DEV_E_FILE>;
    close(DEV_E_FILE);
    chop(@DEV_E);
    $sentence_count = scalar(@DEV_E);
    normalize_text(\@DEV_E);
    $length_reference = compute_length(\@DEV_E);
}
else {
    $multi_flag = 1;
    $length_reference = 0;
    my $part = 0;
    while (-e "$___DEV_E$part") {
	open DEV_E_PART_FILE, "<:raw", "$___DEV_E$part";
	my @DEV_E_PART = <DEV_E_PART_FILE>;
	close(DEV_E_PART_FILE);
	chop(@DEV_E_PART);
	normalize_text(\@DEV_E_PART);
	$length_reference += compute_length(\@DEV_E_PART) if ($reference_length_mode eq "average");
	push @DEV_E,\@DEV_E_PART;
	$part++;
    }
    print "Read $part reference sets.\n";
    $length_reference = int($length_reference/$part) if ($reference_length_mode eq "average");
    
    $sentence_count = scalar(@{$DEV_E[0]});

    if ($reference_length_mode eq "shortest") {
	for(my $sentence=0;$sentence<$sentence_count;$sentence++) {
	    my $shortest_ref_length;
	    for (my $part=0;$part<scalar(@DEV_E);$part++) {
		my @WORD = split(/ /, $DEV_E[$part][$sentence]);
		$shortest_ref_length = scalar @WORD # find the shortest reference segment
		    if (not defined $shortest_ref_length) or @WORD < $shortest_ref_length;
	    }
	    $length_reference += $shortest_ref_length;
	}
    }
}

sub normalize_text {
    my ($CORPUS) = @_;
    foreach (@{$CORPUS}) {
	s/^ +//; s/ +$//; s/ +/ /g;
    }
}

sub compute_length {
    my ($CORPUS) = @_;
    my $length = 0;
    foreach (@{$CORPUS}) {
	my @WORD = split;
	$length += scalar(@WORD);
    }
    return $length;
}

`mkdir -p $___WORKING_DIR`;

my $run=$start_run-1;
my $prev_size = -1;
my $best_random_bleu;
while(1) {
  $run++;
  # run beamdecoder with option to output lattices
  # the end result should be (1) @NBEST_LIST, a list of lists; (2) @SCORE, a list of lists of lists

  print "($run) run decoder to produce n-best lists\n";

  my @NBEST_LIST;
  my @SCORE;
  my $size = &run_decoder_nbest(\@LAMBDA, \@NBEST_LIST, \@SCORE);
  last if $size <= $prev_size;
  $prev_size = $size;

  my @BLEU_IMPACT;
  &precompute_bleu_impact(\@NBEST_LIST,\@BLEU_IMPACT);

  # for debugging output only?
  &compute_bleu(\@LAMBDA,\@SCORE,\@NBEST_LIST,\@BLEU_IMPACT,"print_best");

  # compute threshold points for each sentence, for each parameter
  print "($run) optimize parameters\n";

  # iterate through a number of random starting points
  $best_random_bleu = 0;
  my $best_random;
  my @BEST_RANDOM_LAMBDA;
  for(my $random=1;$random<=$n_random;$random++) {
    my $subrun = 0;
    if ($random>1) { # for $random = 1, use old best
#      @LAMBDA = (.5+rand(1), .5+rand(1), .25+rand(.5), .25+rand(.5), .25+rand(.5), .25+rand(0.25), -0.5+rand(1), -0.5+rand(1));
#      @LAMBDA = (.5+rand(1), .5+rand(1), .25+rand(.5), .25+rand(.5), .25+rand(.5), .25+rand(0.25), -0.5+rand(1));
#      @LAMBDA = (.5+rand(1), .5+rand(1), .25+rand(.5), .5+rand(.5), .25+rand(.5), -0.5+rand(1));
	@LAMBDA = ();
	for(my $m=0;$m<scalar(@MIN);$m++) {
	    push @LAMBDA,$MIN[$m]+rand($MAX[$m]-$MIN[$m]);
	}
#      @LAMBDA = (.5+rand(1), .5+rand(1), .5+rand(.5), .125+rand(.25), -0.5+rand(1));
    }
    
    my $best_bleu = &compute_bleu(\@LAMBDA,\@SCORE,\@NBEST_LIST,\@BLEU_IMPACT);

    # greedy search...
    print "($run.$random) INIT BLEU: $best_bleu\n";
    my $best_lambda = -1;
    while(1) {
	my $skip_lambda = $best_lambda;
	$best_lambda = -1;
      my $check_best_bleu = &compute_bleu(\@LAMBDA,\@SCORE,\@NBEST_LIST,\@BLEU_IMPACT);
      print "BETTERING $best_bleu\n";
      if (abs($best_bleu - $check_best_bleu) > $SMALL) {
	  # This can happen in the case of a tie between translations
	  print "warning: inconsistent BLEU scores ($best_bleu != $check_best_bleu)\n";
	  # Keep the BLEU score obtained by the previous iteration
	  # by find_threshold_points+find_max_bleu. If we don't, we
	  # could end up trying to better the same point ad infinitum.
      }

      my $best_weight;

      # optimize in each direction, choose best direction
      for(my $lambda=0;$lambda<=$#LAMBDA;$lambda++) {
	  next if ($lambda == $skip_lambda); # because we already globally optimized this lambda
	my (%THRESHOLD,@BEST,%BLEU_CHANGE);

	#&find_threshold_points(\@SCORE,\@BLEU_IMPACT,$lambda,\%THRESHOLD,\@BEST,\%BLEU_CHANGE);
	&find_threshold_points_c(\@SCORE,\@BLEU_IMPACT,$lambda,\%THRESHOLD,\@BEST,\%BLEU_CHANGE, $sentence_count, \@LAMBDA, $verbose);
	my ($bleu,$weight) = &find_max_bleu(\%THRESHOLD,\@BEST,\%BLEU_CHANGE,\@BLEU_IMPACT, \@LAMBDA,\@SCORE,\@NBEST_LIST);
	
	print "BEST BLEU (find): $lambda, $bleu, $weight\n" ;
#      ($bleu,$weight) = &compute_max_bleu(\%THRESHOLD,$lambda,\@SCORE,\@NBEST_LIST,\@BLEU_IMPACT);
#      print "BEST BLEU (comp): $lambda, $bleu, $weight\n" ;
	
	# compute bleu for each interval, find best
	if ($bleu>$best_bleu+.000001) {
	  print "BETTER BLEU: $bleu>$best_bleu+.000001 ($lambda)\n";
	  $best_bleu = $bleu;
	  $best_weight = $weight;
	  $best_lambda = $lambda;
	}
      }
      last if $best_lambda == -1;
      
      $subrun++;
      print "($run.$random.$subrun) BEST BLEU: (";
      for(my $i=0;$i<=$#LAMBDA;$i++) {
	print "," if $i;
	if ($i == $best_lambda) { print "*$best_weight*"; }
	else { print $LAMBDA[$i]; }
      }
      print ") = $best_bleu\n";
      
      $LAMBDA[$best_lambda] = $best_weight;
    }
    # print
    print "($run.$random) BEST RANDOM BLEU: (";
    for(my $i=0;$i<=$#LAMBDA;$i++) {
      print "," if $i;
      print $LAMBDA[$i];
    }
    print ") = $best_bleu\n";

    # normalize ...
    my $sum = 0;
    for(my $i=0;$i<=$#LAMBDA;$i++) { 
      # Make absolute values sum to one. Don't know if this
      # is really the right thing to do. (DWC)
      $sum += abs($LAMBDA[$i]);
    }
    for(my $i=0;$i<=$#LAMBDA;$i++) {
      $LAMBDA[$i] /= $sum;
    }

    # print normalized
    print "($run.$random) NORMALIZED BEST RANDOM BLEU: (";
    for(my $i=0;$i<=$#LAMBDA;$i++) {
      print "," if $i;
      print $LAMBDA[$i];
    }
    print ") = $best_bleu\n";
    
    # best if better bleu 
    if($best_bleu > $best_random_bleu) {
      for(my $i=0;$i<=$#LAMBDA;$i++) {
	$BEST_RANDOM_LAMBDA[$i] = $LAMBDA[$i];
      }
      $best_random = $random;
      $best_random_bleu = $best_bleu;
    }
  }
  # global lambda is best random
  for(my $i=0;$i<=$#LAMBDA;$i++) {
    $LAMBDA[$i] = $BEST_RANDOM_LAMBDA[$i];
  }
  print "($run) keeping point $best_random for next run: (";
    for(my $i=0;$i<=$#LAMBDA;$i++) {
      print "," if $i;
      print $LAMBDA[$i];
    }
    print ") = $best_random_bleu\n";
  &compute_bleu(\@LAMBDA,\@SCORE,\@NBEST_LIST,\@BLEU_IMPACT,"print_best");
}
&create_config();

# find points along line at which best translation changes
# store in @THRESHOLD the intersection values of lambda
# store in @BEST the best translation for each sentence for lambda->-inf
# store in @BLEU_CHANGE the change in counts from previous interval

sub find_threshold_points {
  my ($SCORE,$BLEU_IMPACT,$lambda,$THRESHOLD,$BEST,$BLEU_CHANGE) = @_;
  for(my $sentence=0;$sentence<$sentence_count;$sentence++) {
    
    

    # line equation: score = $incline * lambda + $fixed_score


    # find the best translation with lim(lambda->-inf)
    
    my $best;
    my $best_decline = 1e100;
    my $best_score = -1e100;
    for(my $translation=0;$translation<=$#{$$SCORE[$sentence]};$translation++) {
      my $score = 0;
      for(my $i=0;$i<=$#LAMBDA;$i++) {
	$score += $LAMBDA[$i] * ${$SCORE}[$sentence][$translation][$i];
      }
      my $decline = ${$SCORE}[$sentence][$translation][$lambda];
      if ($decline < $best_decline
	  || ($decline == $best_decline && $score>$best_score )) {
	$best_decline = $decline;
	$best_score = $score;
	$best = $translation;
      }
    }
    $$BEST[$sentence] = $best;
    
    # (iteratively) find intersection points to the right
    print "lambda $lambda, sentence $sentence:\n\tinitial best: $best\n"
	if $verbose>=5;
    


    # score without this feature (weighted)
    my $best_fixed_score = $best_score - $LAMBDA[$lambda] * ${$SCORE}[$sentence][$best][$lambda];
    my $last_threshold = -1e100;
    while(1) {

      # score for this feature
      my $right;
      my $right_fixed_score;
      my $right_incline;
      my $right_threshold = 1e100;


      for(my $translation=0;$translation<=$#{${$SCORE}[$sentence]};$translation++) {
	
	# must be steeper, otherwise it could never overcome it
	my $best_incline = ${$SCORE}[$sentence][$best][$lambda];
	my $incline = ${$SCORE}[$sentence][$translation][$lambda];
	if ($incline > $best_incline) {
	  
	  # compute intersection point
	  my $fixed_score = 0;
	  for(my $i=0;$i<=$#LAMBDA;$i++) {
	    if ($i != $lambda) {
	      $fixed_score += $LAMBDA[$i] * ${$SCORE}[$sentence][$translation][$i];
	    }
	  }
	  my $threshold = - (($best_fixed_score - $fixed_score) / 
			     ($best_incline - $incline));
	  
	  # intersection point must be to the right of the last intersection point (should always be true)
	  if ($threshold > $last_threshold) {
	    # and must be the closest to the left
#	    print "\t\t$translation: ($threshold == $right_threshold && $incline < $right_incline)\n";
	    if ($threshold < $right_threshold ||
		# special case: same threshold, smaller incline 
		# (may happen at threshold 0)
		($threshold == $right_threshold && $incline > $right_incline)){
	      $right = $translation;
	      $right_fixed_score = $fixed_score;
	      $right_threshold = $threshold;
	      $right_incline = $incline;
	    }
	  }
	}      
      }
      
      last if ($right_threshold == 1e100);
      print "\tnext best $right at $right_threshold (".
	#$right_fixed_score+$right_threshold*$right_incline=".
	($right_fixed_score+$right_threshold*$right_incline).")\n"
	if $verbose >= 5;

      # using a hash allows intersection lists to be merged
      $$THRESHOLD{$right_threshold}++;

      for(my $ngram=1;$ngram<=4;$ngram++) {
	${$BLEU_CHANGE}{$right_threshold}{CORRECT}[$ngram]
	  += $$BLEU_IMPACT[$sentence][$right]{CORRECT}[$ngram]
	  -  $$BLEU_IMPACT[$sentence][$best ]{CORRECT}[$ngram];
	${$BLEU_CHANGE}{$right_threshold}{TOTAL}[$ngram]
	  += $$BLEU_IMPACT[$sentence][$right]{TOTAL}[$ngram]
	  -  $$BLEU_IMPACT[$sentence][$best ]{TOTAL}[$ngram];
      }
      ${$BLEU_CHANGE}{$right_threshold}{LENGTH} 
        += $$BLEU_IMPACT[$sentence][$right]{LENGTH}
        -  $$BLEU_IMPACT[$sentence][$best]{LENGTH};
      
      $best = $right;
      $best_fixed_score = $right_fixed_score;
      $last_threshold = $right_threshold;
    }
  }
}

sub compute_max_bleu {
  my ($THRESHOLD,$lambda,$SCORE,$NBEST_LIST,$BLEU_IMPACT) = @_;
  my $max_bleu = 0;
  my $best_weight;
  $$THRESHOLD{-1e100}++;
  $$THRESHOLD{1e100}++;
  
  my @THRESHOLD = sort { $a <=> $b } keys %{$THRESHOLD};
  $THRESHOLD[0] = $THRESHOLD[1] - 0.1; # bug
  $THRESHOLD[$#THRESHOLD] = $THRESHOLD[$#THRESHOLD-1] + 0.1;
  print "found $#THRESHOLD points for $lambda\n";
  for(my $i=0;$i<$#THRESHOLD;$i++) {
    my @CHANGED_LAMBDA = @LAMBDA;
    my $weight = ( $THRESHOLD[$i] + $THRESHOLD[$i+1] ) / 2;
    $CHANGED_LAMBDA[$lambda] = $weight;
    my $bleu = &compute_bleu( \@CHANGED_LAMBDA,$SCORE,$NBEST_LIST,$BLEU_IMPACT );
    if ($bleu > $max_bleu) {
      $max_bleu = $bleu;
      $best_weight = $weight;
    }
  }
  return ($max_bleu,$best_weight);
}

# Count n-gram matches from @NBEST_LIST, put results in @BLEU_IMPACT

sub precompute_bleu_impact {
  my ($NBEST_LIST,$BLEU_IMPACT) = @_;
  for(my $sentence=0;$sentence<$sentence_count;$sentence++) {
    for(my $translation=0;$translation<=$#{${$NBEST_LIST}[$sentence]};$translation++) {
      my (@TOTAL,@CORRECT);
      for(my $length=1; $length<=4; $length++) {
	$TOTAL[$length] = 0;
	$CORRECT[$length] = 0;
      }
      my @TRANSLATION = split(/ /,${$NBEST_LIST}[$sentence][$translation]);
      my %REF_GRAM;
      if ($multi_flag) {
	  for(my $part=0;$part<scalar(@DEV_E);$part++) {
	      my %REF_GRAM_PART;
	      &collect_counts_for_ref($DEV_E[$part][$sentence],\%REF_GRAM_PART);
	      foreach (keys %REF_GRAM_PART) {
		  if (!defined($REF_GRAM{$_}) || 
		      $REF_GRAM{$_} < $REF_GRAM_PART{$_}) {
		      $REF_GRAM{$_} = $REF_GRAM_PART{$_};
		  }
	      }
	  }
      }
      else {
	  &collect_counts_for_ref($DEV_E[$sentence],\%REF_GRAM);
      }
      
      # ... collect matches for system translation
      for(my $length=1; $length<=4; $length++) {
	for(my $start=0; $start <= $#TRANSLATION - ($length-1); $start++) {
	  my $ngram = "";
	  for(my $w=$start; $w<$start+$length; $w++) {
	    $ngram .= " " unless $w == $start;
	    $ngram .= $TRANSLATION[$w];
	  }
	  if (defined($REF_GRAM{$ngram}) && $REF_GRAM{$ngram} > 0) {
	    $REF_GRAM{$ngram}--;
	    $CORRECT[$length]++;
	  }
	}
	$TOTAL[$length] += scalar(@TRANSLATION) - ($length-1);
      }
      ${$BLEU_IMPACT}[$sentence][$translation]{CORRECT} = \@CORRECT;
      ${$BLEU_IMPACT}[$sentence][$translation]{TOTAL} = \@TOTAL;
      ${$BLEU_IMPACT}[$sentence][$translation]{LENGTH} = scalar @TRANSLATION;
    }
  }
}

sub collect_counts_for_ref {
    my ($sentence,$REF_GRAM) = @_;
    my @REFERENCE = split(/ /,$sentence);
    for(my $length=1; $length<=4; $length++) {
	for(my $start=0; $start<=$#REFERENCE-($length-1);$start++) {
	    my $ngram = "";
	    for(my $w=$start; $w<$start+$length; $w++) {
		$ngram .= " " unless $w == $start;
		$ngram .= $REFERENCE[$w];
	    }
	    $$REF_GRAM{$ngram}++;
	}
    }
}

# maximize BLEU score along line

sub find_max_bleu {
  my($THRESHOLD,$BEST,$BLEU_CHANGE,$BLEU_IMPACT, $LAMBDA,$SCORE,$NBEST_LIST) = @_;

  my @THRESHOLD = sort { $a <=> $b } keys %{$THRESHOLD};
  push @THRESHOLD, $THRESHOLD[$#THRESHOLD] + 0.1;
  print "found $#THRESHOLD points\n";

  my (@CORRECT,@TOTAL,$length);
  for(my $sentence=0;$sentence<$sentence_count;$sentence++) {
    my $best = $$BEST[$sentence];
    for(my $ngram=1;$ngram<=4;$ngram++) {
      $CORRECT[$ngram] += $$BLEU_IMPACT[$sentence][$best]{CORRECT}[$ngram];
      $TOTAL  [$ngram] += $$BLEU_IMPACT[$sentence][$best]{TOTAL  }[$ngram];
    }
    $length += $$BLEU_IMPACT[$sentence][$best]{LENGTH};
  }
  my $max_bleu = &factorize_bleu(\@CORRECT,\@TOTAL,$length,0);
  my $best_weight = $THRESHOLD[0] - 0.1;
  my $best_i = -1;
  for(my $i=0;$i<$#THRESHOLD;$i++) {
    for(my $ngram=1;$ngram<=4;$ngram++) {
      $CORRECT[$ngram] += $$BLEU_CHANGE{$THRESHOLD[$i]}{CORRECT}[$ngram];
      $TOTAL  [$ngram] += $$BLEU_CHANGE{$THRESHOLD[$i]}{TOTAL  }[$ngram];
    }
    $length += $$BLEU_CHANGE{$THRESHOLD[$i]}{LENGTH};
    #print "LAMBDA $THRESHOLD[$i] ($i): ";
    my $bleu = &factorize_bleu(\@CORRECT,\@TOTAL,$length,0);
    if ($bleu > $max_bleu) {
      $max_bleu = $bleu;
      if (abs($THRESHOLD[$i] - $THRESHOLD[$i+1]) < $SMALL) {
	  print "splitting hairs: threshold point $i, $THRESHOLD[$i] and $THRESHOLD[$i+1]\n";
      }
      $best_weight = ($THRESHOLD[$i] + $THRESHOLD[$i+1]) / 2;
      $best_i = $i;
    }
  }
  # printf "best is point between $best_i and %d\n", $best_i+1;
  return ($max_bleu,$best_weight);
}

# compute BLEU score of 1-best translations

sub compute_bleu {
  my ($LAMBDA,$SCORE,$NBEST_LIST,$BLEU_IMPACT,$print_best) = @_;
  my (@TOTAL,@CORRECT,$length_translation);

  for(my $sentence=0;$sentence<$sentence_count;$sentence++) {

    # find best translation
    my $best;
    my $best_score = -1e100;
    for(my $translation=0;$translation<=$#{${$SCORE}[$sentence]};$translation++) {
      my $score = 0;
      for(my $i=0;$i<=$#LAMBDA;$i++) {
	$score += $$LAMBDA[$i] * ${$SCORE}[$sentence][$translation][$i];
      }
      if ($score>$best_score) {
	$best_score = $score;
	$best = $translation;
      }
    }

    print "($run) BEST TRANSLATION for $sentence is $best, score $best_score (length $$BLEU_IMPACT[$sentence][$best]{LENGTH}, ".(scalar(@{$$SCORE[$sentence]}))." scores, ".(scalar(@{$$NBEST_LIST[$sentence]}))." text translations): $$NBEST_LIST[$sentence][$best]\n" if $print_best;

    # add counts for this sentence's best translation
    for(my $length=1; $length<=4; $length++) {
      $CORRECT[$length] += $$BLEU_IMPACT[$sentence][$best]{CORRECT}[$length];
      $TOTAL  [$length] += $$BLEU_IMPACT[$sentence][$best]{TOTAL  }[$length];
    }
    $length_translation += $$BLEU_IMPACT[$sentence][$best]{LENGTH};
  }

  # ... compute blue
  return &factorize_bleu(\@CORRECT,\@TOTAL,$length_translation,1);
}

sub factorize_bleu {
  my ($CORRECT,$TOTAL,$length,$print) = @_;
  my $brevity_penalty = 1;

  if ($length<$length_reference) {
    $brevity_penalty = exp(1-$length_reference/$length);
  }
  my $precision = 0;

  print "BLEU for ($length_reference/$length " if $print;

  for(my $length=1; $length<=4; $length++) {
    if ($$CORRECT[$length] == 0) {
      $precision += -9999999999;
    }
    else {
      $precision += log($$CORRECT[$length]/$$TOTAL[$length]);
      print " $$CORRECT[$length]/$$TOTAL[$length]" if $print;
    }
  }

#  for(my $i=0;$i<=$#LAMBDA;$i++) {
#    print "," if $i;
#    print $$LAMBDA[$i];
#  }
  print ") = ".($brevity_penalty * exp($precision/4))."\n" if $print;

  return $brevity_penalty * exp($precision/4);
}

sub run_decoder_nbest {
  my ($LAMBDA, $NBEST_LIST, $SCORE) = @_;
  my @ALREADY;
  
  my $filename_template = "$___WORKING_DIR/run%d.best$___N_BEST_LIST_SIZE.out";

  my $size = 0;

  # Collect translations from all previous decoder runs, and the current decoder run
  for (my $r=$run; $r>=1; $r--) {
    my $filename = sprintf $filename_template, $r;
    if ($r == $run && (!$skip_decoder || $run > $start_run)) { 
      my $weights = join(' ', @$LAMBDA);
      $weights = $decoder_config." CONFIG ".$weights if $decoder_config;

      # Current decoder run
      if ($___DECODER =~ /(200\d-\d\d-\d\d)/ && $1 ge '2005-07-21') {
	  my $decoder_cmd = "$___DECODER $PARAMETERS ".sprintf($decoder_config,@{$LAMBDA})." -n-best-list $filename $___N_BEST_LIST_SIZE < $___DEV_F > $___WORKING_DIR/run$run.out";
	  print "$decoder_cmd\n";
	  `$decoder_cmd`;
      }	  
      else {
	  print "mert-pharaoh-wrapper.perl $___N_BEST_LIST_SIZE \"$weights\" $___WORKING_DIR/run$run \"$PARAMETERS\" $___DECODER < $___DEV_F > $filename\n";
	  `mert-pharaoh-wrapper.perl $___N_BEST_LIST_SIZE \"$weights\" $___WORKING_DIR/run$run \"$PARAMETERS\" $___DECODER < $___DEV_F > $filename\n`;
      }
    }

    open DECODER, "<:raw", $filename;
    print "Scanning $filename\n";
	while (<DECODER>) {
            my ($sentence, $translation, $scores) = split(/\|\|\|/);
            # Use the scores as a unique identifier for the translation.
            # This may seem extreme, but since the lambdas are the only
            # knobs we control here, we can't separate translations with
            # the same feature scores anyway.
            # In the case of a tie, we give priority to later
            # runs over earlier runs, and within a run, we give priority
            # to earlier translations over later ones.
            $translation =~ s/^\s+//; $translation =~ s/\s+$//; $translation =~ s/\s+/ /g;
            $scores =~ s/^\s+//; $scores =~ s/\s+$//; $scores =~ s/\s+/ /g;
	    if (!$ALREADY[$sentence]->{$scores}) {
	       push @{$$NBEST_LIST[$sentence]}, $translation;
	       push @{$$SCORE[$sentence]}, [ split(/\s+/, $scores) ];
	       $ALREADY[$sentence]->{$scores} = 1;
               $size++;
            }
	} # while
	close DECODER;
    } # for
    print "($run) $size translations total\n";
    return $size;
}

sub create_config {
    my %ABBR;
    open(ABBR,"/home/pkoehn/statmt/project/pharaoh/beamdecoder/parameter.cpp");
    while(<ABBR>) {
	next unless /settingAbbr\[\"(.+)\"\]\s*=\s*\"(.+)\";/;
	$ABBR{$2} = $1;
    }
    close(ABBR);

    my %P;
    # parameters specified at the command line
    {
	my $parameter;
	foreach (split(/ /,$___PARAMETERS)) {
	    print "$_ :::\n";
	    if (/^\-([^\d].*)$/) {
		$parameter = $1;
		$parameter = $ABBR{$parameter} if defined($ABBR{$parameter});
		print "is parameter $parameter\n";
	    }
	    else {
		push @{$P{$parameter}},$_;
	    }
	}
    }

    # tuned parameters
    if ($___LAMBDA =~ /lm/) {
	my $l=0;
	foreach (split(/ /,$lambda_ranges)) {
	    my ($name,$values) = split(/:/);
	    $name = $ABBR{$name} if defined($ABBR{$name});
	    foreach my $feature (split(/;/,$values)) {
		push @{$P{$name}},$LAMBDA[$l++];
	    }
	}
    }
    else {
	push @{$P{"weight-d"}},$LAMBDA[0];
	push @{$P{"weight-l"}},$LAMBDA[1];
	for (my $i=2; $i<=$#LAMBDA-1; $i++) {
	    push @{$P{"weight-t"}},$LAMBDA[$i];
	}
	push @{$P{"weight-w"}},$LAMBDA[$#LAMBDA]; 
    }

    # create new pharaoh.ini decoder config file
    open(INI,$P{"config"}[0]);
    delete($P{"config"});
    print "OUT: >$___WORKING_DIR/pharaoh.ini\n";
    open(OUT,">$___WORKING_DIR/pharaoh.ini");
    print OUT "# MERT optimized configuration\n";
    print OUT "# decoder $___DECODER\n";
    print OUT "# dev $___DEV_F\n";
    print OUT "# $run iterations\n";
    print OUT "# finished ".`date`;
    print OUT "# score on dev: $best_random_bleu\n\n";
    my $line = <INI>;
    while(1) {
	last unless $line;

	# skip until hit [parameter]
	if ($line !~ /^\[(.+)\]\s*$/) { 
	    $line = <INI>;
	    print OUT $line if $line =~ /^\#/ || $line =~ /^\s+$/;
	    next;
	}

	# parameter name
	my $parameter = $1;
	$parameter = $ABBR{$parameter} if defined($ABBR{$parameter});
	print OUT "[$parameter]\n";

	# change parameter, if new values
	if (defined($P{$parameter})) {
	    # write new values
	    foreach (@{$P{$parameter}}) {
		print OUT $_."\n";
	    }
	    delete($P{$parameter});
	    # skip until new parameter, only write comments
	    while($line = <INI>) {
		print OUT $line if $line =~ /^\#/ || $line =~ /^\s+$/;
		last if $line =~ /^\[/;
		last unless $line;
	    }
	    next;
	}
	
	# unchanged parameter, write old
	while($line = <INI>) {
	    last if $line =~ /^\[/;
	    print OUT $line;
	}
    }

    foreach my $parameter (keys %P) {
	print OUT "\n[$parameter]\n";
	foreach (@{$P{$parameter}}) {
	    print OUT $_."\n";
	}
    }

    close(INI);
    close(OUT);
}

__DATA__
__C__

#define SMALL 1e-10

void find_threshold_points_c(AV* SCORE, AV* BLEU_IMPACT, int lambda, HV* THRESHOLD, AV* BEST, HV* BLEU_CHANGE, int sentnums, AV* LAMBDA, int verbose)
{
	int i, j, best, translation, sent, right, threshold_hashval, ngram;
	int ival_1, ival_2, ival_3,right_threshold_str_len;
	double best_decline, best_score, score, decline, best_fixed_score, last_threshold, threshold, LAMBDA_lambda;
	double right_fixed_score, right_incline, right_threshold, incline, best_incline, fixed_score;
	double val_1, val_2, val_3;
	SV** tmp, **tmp2;
	AV *SCORE_sent, *SCORE_sent_translation, *SCORE_sent_best, *BLEU_IMPACT_sent, *BLEU_CHANGE_rt_correct, *BLEU_CHANGE_rt_total, *BLEU_IMPACT_sent_r_correct, *BLEU_IMPACT_sent_r_total, *BLEU_IMPACT_sent_best_correct, *BLEU_IMPACT_sent_best_total;
	HV* BLEU_CHANGE_rt, *BLEU_IMPACT_sent_r, *BLEU_IMPACT_sent_best, *hash1, *hash2, *hash3;
	char right_threshold_str[200];

	for (sent=0;sent<sentnums;sent++)
	{
		best_decline = 1e100;
		best_score = -1e100;
		tmp = av_fetch(SCORE, sent, 0);
		SCORE_sent = (AV*)SvRV(*tmp);
		for(translation=0;translation <=av_len(SCORE_sent); translation++)
		{	
			tmp = av_fetch(SCORE_sent, translation, 0);
			SCORE_sent_translation = (AV*)SvRV(*tmp);
			score = 0;
			for(i=0;i<=av_len(LAMBDA);i++)
			{
				tmp = av_fetch(SCORE_sent_translation, i, 0);
				val_1 = SvNV(*tmp);

				tmp2 = av_fetch(LAMBDA, i, 0);
				val_2 = SvNV(*tmp2);

				score += val_1 * val_2;
			}
			tmp = av_fetch(SCORE_sent_translation, lambda, 0);
			decline = SvNV(*tmp);

			if((decline < best_decline) || ((decline == best_decline) && (score > best_score)))
			{
				best_decline = decline;
				best_score = score;
				best = translation;
			}
		}

		av_store(BEST, sent, newSViv(best));

		// find intersection points to the right
		if(verbose >=5)
			fprintf(stderr, "lambda %d, sentence %d:\n\tinitial best: %d\n", lambda, sent, best);

		tmp = av_fetch(LAMBDA, lambda, 0);
		LAMBDA_lambda = SvNV(*tmp);

		tmp = av_fetch(SCORE_sent, best, 0);
		SCORE_sent_best = (AV*)SvRV(*tmp);
		
		tmp = av_fetch(SCORE_sent_best, lambda, 0);

		best_fixed_score = best_score - LAMBDA_lambda * SvNV(*tmp);

		last_threshold = -1e100;

		while(1)
		{
			right_threshold = 1e100;

			for (translation=0;translation<=av_len(SCORE_sent);translation++)
			{
				tmp = av_fetch(SCORE_sent, best, 0);
				SCORE_sent_best = (AV*)SvRV(*tmp);
					
				tmp = av_fetch(SCORE_sent_best, lambda, 0);
				best_incline = SvNV(*tmp);
				
				tmp = av_fetch(SCORE_sent, translation,0);
				SCORE_sent_translation = (AV*)SvRV(*tmp);

				tmp = av_fetch(SCORE_sent_translation, lambda, 0);
				incline = SvNV(*tmp);

				if (incline > best_incline)
				{
					// Compute intersection point
					fixed_score = 0;
					for(i=0;i<=av_len(LAMBDA);i++)
					{
						if(i != lambda)
						{
							tmp = av_fetch(LAMBDA, i, 0);
							tmp2 = av_fetch(SCORE_sent_translation, i, 0);
							
							fixed_score += SvNV(*tmp) * SvNV(*tmp2);
						}
					}

					threshold =  -1 * ((best_fixed_score - fixed_score) / (best_incline - incline));

					// Intersection point must be to the right of the last intersection point
					if (threshold > last_threshold)
					{
						// and must be closest to the left 
						if ((threshold < right_threshold) || 
								//special case, same threshold, smaller incline 
								// may happen at threshold 0
								((threshold == right_threshold) && (incline > right_incline)))
						{
							right = translation;
							right_fixed_score = fixed_score;
							right_threshold = threshold;
							right_incline = incline;
						}
					}
				}
			}
			
			if(right_threshold == 1e100){
				break;
			}

			if(verbose >= 5){
				fprintf(stderr,"\tnext best %d at %.6f ( %.6f ), old best %d\n",right, right_threshold, right_fixed_score+right_threshold*right_incline, best);
			    }

			sprintf(right_threshold_str, "%.14g", right_threshold);
			right_threshold_str_len = strlen(right_threshold_str);
			if(hv_exists(THRESHOLD, right_threshold_str, right_threshold_str_len))
			{
				tmp = hv_fetch(THRESHOLD, right_threshold_str, right_threshold_str_len, 0);
				threshold_hashval = SvIV(*tmp);	
				hv_store(THRESHOLD, right_threshold_str, right_threshold_str_len, newSViv(threshold_hashval+1), 0);
			}
			else
			{
				hv_store(THRESHOLD, right_threshold_str, right_threshold_str_len, newSViv(1), 0);
			}

			if(hv_exists(BLEU_CHANGE, right_threshold_str, right_threshold_str_len))
			{
				tmp = hv_fetch(BLEU_CHANGE, right_threshold_str, right_threshold_str_len, 0);
				BLEU_CHANGE_rt = (HV*)SvRV(*tmp);

				tmp = hv_fetch(BLEU_CHANGE_rt, "CORRECT", 7, 0);
				BLEU_CHANGE_rt_correct = (AV*)SvRV(*tmp);

				tmp = hv_fetch(BLEU_CHANGE_rt, "TOTAL", 5, 0);
				BLEU_CHANGE_rt_total = (AV*)SvRV(*tmp);
			}
			else
			{
				// Use the current value of right_threshold as the key and store a new hash there
				hv_store(BLEU_CHANGE, right_threshold_str, right_threshold_str_len, newRV_noinc((SV*)BLEU_CHANGE_rt = newHV()), 0);

				// create a new array and store it with key "CORRECT" in BLEU_CHANGE_rt
				hv_store(BLEU_CHANGE_rt, "CORRECT", 7, newRV_noinc((SV*)BLEU_CHANGE_rt_correct = newAV()), 0);

				// do the same for the key "TOTAL"
				hv_store(BLEU_CHANGE_rt, "TOTAL", 5, newRV_noinc((SV*)BLEU_CHANGE_rt_total = newAV()), 0);

			}
		
			tmp = av_fetch(BLEU_IMPACT, sent, 0);
			BLEU_IMPACT_sent = (AV*)SvRV(*tmp);		
					
			tmp = av_fetch(BLEU_IMPACT_sent, right, 0);
			BLEU_IMPACT_sent_r = (HV*)SvRV(*tmp);

			tmp = av_fetch(BLEU_IMPACT_sent, best, 0);
			BLEU_IMPACT_sent_best = (HV*)SvRV(*tmp);
				
			tmp = hv_fetch(BLEU_IMPACT_sent_r, "CORRECT", 7, 0);
			BLEU_IMPACT_sent_r_correct = (AV*)SvRV(*tmp);

			tmp = hv_fetch(BLEU_IMPACT_sent_best, "CORRECT", 7, 0);
			BLEU_IMPACT_sent_best_correct = (AV*)SvRV(*tmp);
		
			tmp = hv_fetch(BLEU_IMPACT_sent_r, "TOTAL", 5, 0);
			BLEU_IMPACT_sent_r_total = (AV*)SvRV(*tmp);
			
			tmp = hv_fetch(BLEU_IMPACT_sent_best, "TOTAL", 5, 0);
			BLEU_IMPACT_sent_best_total = (AV*)SvRV(*tmp);
			
			for(ngram=1;ngram<=4;ngram++)
			{
				// Update ${$BLEU_CHANGE}{$right_threshold}{CORRECT}[$ngram]
			
				if(av_exists(BLEU_CHANGE_rt_correct, ngram)) {	
					tmp = av_fetch(BLEU_CHANGE_rt_correct, ngram, 0);
					ival_1 = SvIV(*tmp);
				}
				else {
					ival_1 = 0;
				}

				tmp = av_fetch(BLEU_IMPACT_sent_r_correct, ngram, 0);
				ival_2 = SvIV(*tmp);

				tmp = av_fetch(BLEU_IMPACT_sent_best_correct, ngram, 0);
				ival_3 = SvIV(*tmp);

				av_store(BLEU_CHANGE_rt_correct, ngram, newSViv(ival_1 + ival_2 - ival_3));
				
					
				// Update ${$BLEU_CHANGE}{$right_threshold}{TOTAL}[$ngram]
				
				if(av_exists(BLEU_CHANGE_rt_total, ngram)) {	
					tmp = av_fetch(BLEU_CHANGE_rt_total, ngram, 0);
					ival_1 = SvIV(*tmp);
				}
				else {
					ival_1 = 0;
				}

				tmp = av_fetch(BLEU_IMPACT_sent_r_total, ngram, 0);
				ival_2 = SvIV(*tmp);

				tmp = av_fetch(BLEU_IMPACT_sent_best_total, ngram, 0);
				ival_3 = SvIV(*tmp);

				av_store(BLEU_CHANGE_rt_total, ngram, newSViv(ival_1 + ival_2 - ival_3));
			}

			// Update ${$BLEU_CHANGE}{$right_threshold}{LENGTH}
			tmp = hv_fetch(BLEU_IMPACT_sent_r, "LENGTH", 6, 0);
                        ival_2 = SvIV(*tmp);

                        tmp = hv_fetch(BLEU_IMPACT_sent_best, "LENGTH", 6, 0);  
                        ival_3 = SvIV(*tmp);
		
			if(hv_exists(BLEU_CHANGE_rt, "LENGTH", 6)) {
				tmp = hv_fetch(BLEU_CHANGE_rt, "LENGTH", 6, 0);
				ival_1 = SvIV(*tmp);
			}
			else {
				ival_1 = 0;
			}

			hv_store(BLEU_CHANGE_rt, "LENGTH", 6, newSViv(ival_1 + ival_2 - ival_3), 0);

			best = right;
			best_fixed_score = right_fixed_score;
			last_threshold = right_threshold;
		}
	}
}
		
