#!/usr/bin/perl -w

use strict;
use Getopt::Long "GetOptions";

# Train Phrase Model
# (c) 2002-2006 Philipp Koehn
# Train a phrase model from a parallel corpus
# Version released for WMT 2006 Shared Task

# these variables may have to be adapted to local paths
my $BINDIR = ".";
my $GIZA = "$BINDIR/GIZA++";
my $SNT2COOC = "$BINDIR/snt2cooc.out"; 
my $PHRASE_EXTRACT = "$BINDIR/phrase-extract";
my $PHRASE_SCORE = "$BINDIR/phrase-score";
my $MKCLS = "$BINDIR/mkcls";
my $ZCAT = "zcat";
my $BZCAT = "bzcat";
# -----------------------------------------------------
$ENV{"LC_ALL"} = "C";

my($_ROOT_DIR,$_CORPUS_DIR,$_GIZA_E2F,$_GIZA_F2E,$_MODEL_DIR,$_CORPUS,$_FIRST_STEP,$_LAST_STEP,$_F,$_E,$_MAX_PHRASE_LENGTH,$_LEXICAL_DIR,$_NO_LEXICAL_WEIGHTING,$_VERBOSE,$_ALIGNMENT,$_LM,$_EXTRACT_FILE,$_GIZA_OPTION,$_HELP,$_PARTS,$_DIRECTION,$_ONLY_PRINT_GIZA,$_REORDERING,$_REORDERING_SMOOTH,$_STEM_F,$_STEM_E,$_DEBUG);


$_HELP = 1
    unless &GetOptions('root-dir=s' => \$_ROOT_DIR,
		       'corpus-dir=s' => \$_CORPUS_DIR,
		       'corpus=s' => \$_CORPUS,
		       'f=s' => \$_F,
		       'e=s' => \$_E,
		       'giza-e2f=s' => \$_GIZA_E2F,
		       'giza-f2e=s' => \$_GIZA_F2E,
		       'max-phrase-length=i' => \$_MAX_PHRASE_LENGTH,
		       'lexical-dir=s' => \$_LEXICAL_DIR,
		       'no-lexical-weighting' => \$_NO_LEXICAL_WEIGHTING,
		       'model-dir=s' => \$_MODEL_DIR,
		       'extract-file=s' => \$_EXTRACT_FILE,
		       'alignment=s' => \$_ALIGNMENT,
		       'verbose' => \$_VERBOSE,
		       'first-step=i' => \$_FIRST_STEP,
		       'last-step=i' => \$_LAST_STEP,
		       'giza-option=s' => \$_GIZA_OPTION,
		       'lm=s' => \$_LM,
		       'help' => \$_HELP,
		       'parts=i' => \$_PARTS,
		       'direction=i' => \$_DIRECTION,
		       'only-print-giza' => \$_ONLY_PRINT_GIZA,
		       'reordering=s' => \$_REORDERING,
		       'reordering-smooth=s' => \$_REORDERING_SMOOTH,
		       'stem-f=i' => \$_STEM_F,
		       'stem-e=i' => \$_STEM_E,
		       'debug' => \$_DEBUG);

if ($_HELP) {
    print "Train Phrase Model

Steps: (--first-step to --last-step)
(1) prepare corpus
(2) run GIZA
(3) align words
(4) learn lexical translation
(5) extract phrases
(6) score phrases
(7) learn reordering model
(8) create decoder config file

For more, please check manual or contact koehn\@csail.mit.edu\n";
  exit(1);
}

# set varibles to defaults or from options
die("use --root-dir to specify root directory") unless $_ROOT_DIR;
my $___ROOT_DIR = $_ROOT_DIR;
my $___CORPUS_DIR  = $___ROOT_DIR."/corpus";
$___CORPUS_DIR = $_CORPUS_DIR if $_CORPUS_DIR;
die("use --corpus to specify corpus") unless $_CORPUS || ($_FIRST_STEP && $_FIRST_STEP>1);
my $___CORPUS      = $_CORPUS;

# foreign/English language extension
die("use --f to specify foreign language") unless $_F;
die("use --e to specify English language") unless $_E;
my $___F = $_F;
my $___E = $_E;

# vocabulary files in corpus dir
my $___VCB_E = $___CORPUS_DIR."/".$___E.".vcb";
my $___VCB_F = $___CORPUS_DIR."/".$___F.".vcb";

# GIZA generated files
my $___GIZA = $___ROOT_DIR."/giza";
my $___GIZA_E2F = $___GIZA.".".$___E."-".$___F;
my $___GIZA_F2E = $___GIZA.".".$___F."-".$___E;
$___GIZA_E2F = $_GIZA_E2F if $_GIZA_E2F;
$___GIZA_F2E = $_GIZA_F2E if $_GIZA_F2E;
my $___GIZA_OPTION = "";
$___GIZA_OPTION = $_GIZA_OPTION if $_GIZA_OPTION;

# alignment heuristic
my $___ALIGNMENT = "grow-diag-final";
$___ALIGNMENT = $_ALIGNMENT if $_ALIGNMENT;
my $___NOTE_ALIGNMENT_DROPS = 1;

# model dir and extract file
my $___MODEL_DIR = $___ROOT_DIR."/model";
$___MODEL_DIR = $_MODEL_DIR if $_MODEL_DIR;
my $___EXTRACT_FILE = $___MODEL_DIR."/extract";
$___EXTRACT_FILE = $_EXTRACT_FILE if $_EXTRACT_FILE;


my $___MAX_PHRASE_LENGTH = 7;
my $___LEXICAL_WEIGHTING = 1;
my $___LEXICAL_DIR = $___MODEL_DIR;
$___MAX_PHRASE_LENGTH = $_MAX_PHRASE_LENGTH if $_MAX_PHRASE_LENGTH;
$___LEXICAL_WEIGHTING = 0 if $_NO_LEXICAL_WEIGHTING;
$___LEXICAL_DIR = $_LEXICAL_DIR if $_LEXICAL_DIR;

my $___VERBOSE = 0;
my $___FIRST_STEP = 1;
my $___LAST_STEP = 8;
$___VERBOSE = $_VERBOSE if $_VERBOSE;
$___FIRST_STEP = $_FIRST_STEP if $_FIRST_STEP;
$___LAST_STEP =  $_LAST_STEP  if $_LAST_STEP;

my $___STEM_F = 0;
$___STEM_F = $_STEM_F if $_STEM_F;
my $___STEM_E = 0;
$___STEM_E = $_STEM_E if $_STEM_E;

my $___LM = "native.lm";
$___LM = $_LM if $_LM;

my $___PARTS = 1;
$___PARTS = $_PARTS if $_PARTS;

my $___DIRECTION = 0;
$___DIRECTION = $_DIRECTION if $_DIRECTION;

my $___ONLY_PRINT_GIZA = 0;
$___ONLY_PRINT_GIZA = 1 if $_ONLY_PRINT_GIZA;

my $___REORDERING = "distance";
$___REORDERING = $_REORDERING if $_REORDERING;
my $___REORDERING_SMOOTH = 0.5;
$___REORDERING_SMOOTH = $_REORDERING_SMOOTH if $_REORDERING_SMOOTH;
my %REORDERING_MODEL;
foreach my $r (split(/,/,$___REORDERING)) {
    if (!( $r eq "orientation-f" ||
         $r eq "orientation-fe" ||
         $r eq "orientation-bidirectional-f" ||
         $r eq "orientation-bidirectional-fe" ||
         $r eq "monotonicity-f" ||
         $r eq "monotonicity-fe" ||
         $r eq "monotonicity-bidirectional-f" ||
         $r eq "monotonicity-bidirectional-fe" ||
         $r eq "distance")) {
       print STDERR "unknown reordering type: $r";
       exit(1);
    }
    $REORDERING_MODEL{$r}++;
    if ($r =~ /-f$/) { $REORDERING_MODEL{"f"}++; }
    if ($r =~ /-fe$/) { $REORDERING_MODEL{"fe"}++; }
}
my ($mono_previous_f,$swap_previous_f,$other_previous_f);
my ($mono_previous_fe,$swap_previous_fe,$other_previous_fe);
my ($mono_following_f,$swap_following_f,$other_following_f);
my ($mono_following_fe,$swap_following_fe,$other_following_fe);
my ($f_current,$e_current);

my $alignment_id;

### MAIN

&prepare()         if $___FIRST_STEP==1;
&run_giza()        if $___FIRST_STEP<=2 && $___LAST_STEP>=2;
&word_align()      if $___FIRST_STEP<=3 && $___LAST_STEP>=3;
&get_lexical()     if $___FIRST_STEP<=4 && $___LAST_STEP>=4;
&extract_phrase()  if $___FIRST_STEP<=5 && $___LAST_STEP>=5;
&score_phrase()    if $___FIRST_STEP<=6 && $___LAST_STEP>=6;
&get_reordering()  if $___FIRST_STEP<=7 && $___LAST_STEP>=7;
&create_ini()      if                      $___LAST_STEP==8;

### (1) PREPARE CORPUS

sub prepare {
    print STDERR "(1) preparing corpus @ ".`date`;
    `mkdir -p $___CORPUS_DIR`;

    if ($___STEM_F || $___STEM_E) {
	if ($___STEM_F) { &stem($___CORPUS,$___F,$___STEM_F); }
	else {`ln -s ${___CORPUS}.${___F} ${___CORPUS}.stem.${___F}`; }
	if ($___STEM_E) { &stem($___CORPUS,$___E,$___STEM_E); }
	else {`ln -s ${___CORPUS}.${___E} ${___CORPUS}.stem.${___E}`; }
	$___CORPUS .= ".stem";
    }
    
    &make_classes($___CORPUS.".".$___F,$___VCB_F.".classes");
    &make_classes($___CORPUS.".".$___E,$___VCB_E.".classes");
    
    my $VCB_F = &get_vocabulary($___F,$___VCB_F);
    my $VCB_E = &get_vocabulary($___E,$___VCB_E);
    
    &numberize_txt_file($VCB_F,$___CORPUS.".".$___F,
			$VCB_E,$___CORPUS.".".$___E,
			$___CORPUS_DIR."/$___F-$___E-int-train.snt");
    
    &numberize_txt_file($VCB_E,$___CORPUS.".".$___E,
			$VCB_F,$___CORPUS.".".$___F,
			$___CORPUS_DIR."/$___E-$___F-int-train.snt");
}

sub stem {
    my ($corpus,$language,$length) = @_;
    print STDERR "stemming $corpus.$language to length $length\n";
    open(FULL,"$corpus.$language");
    open(STEM,">$corpus.stem.$language");
    while(<FULL>) {
	chop;
	my $first = 1;
	foreach (split) { 
	    print STEM " " unless $first;
	    print STEM substr($_,0,$length);
	    $first = 0;
	}
	print STEM "\n";
    }
    close(STEM);
    close(FULL);
}

sub make_classes {
    my ($corpus,$classes) = @_;
    my $cmd = "$MKCLS -c50 -n2 -p$corpus -V$classes opt";
    print STDERR "(1.1) running mkcls  @ ".`date`."$cmd\n";
    `$cmd`;
}

sub get_vocabulary {
    return unless $___LEXICAL_WEIGHTING;
    my($language,$vcb) = @_;
    print STDERR "(1.2) creating vcb file $vcb @ ".`date`;
    
    my %WORD;
    open(TXT,"cat $___CORPUS.$language|");
    while(<TXT>) {
	chop;
	foreach (split) { $WORD{$_}++; }
    }
    close(TXT);
    
    my @NUM;
    foreach my $word (keys %WORD) {
	my $vcb_with_number = sprintf("%07d %s",$WORD{$word},$word);
	push @NUM,$vcb_with_number;
    }
    
    my %VCB;
    open(VCB,">$vcb");
    print VCB "1\tUNK\t0\n";
    my $id=2;
    foreach (reverse sort @NUM) {
	my($count,$word) = split;
	printf VCB "%d\t%s\t%d\n",$id,$word,$count;
	$VCB{$word} = $id;
	$id++;
    }
    close(VCB);
    
    return \%VCB;
}

sub numberize_txt_file {
    my ($VCB_DE,$in_de,$VCB_EN,$in_en,$out) = @_;
    my %OUT;
    print STDERR "(1.3) numberizing corpus $out @ ".`date`;
    open(IN_DE,$in_de);
    open(IN_EN,$in_en);
    open(OUT,">$out");
    while(my $de = <IN_DE>) {
	my $en = <IN_EN>;
	print OUT "1\n";
	print OUT &numberize_line($VCB_EN,$en);
	print OUT &numberize_line($VCB_DE,$de);
    }
    close(IN_DE);
    close(IN_EN);
    close(OUT);
}

sub numberize_line {
    my ($VCB,$txt) = @_;
    chomp($txt);
    my $out = "";
    my $not_first = 0;
    foreach (split(/ /,$txt)) { 
	next if $_ eq '';
	$out .= " " if $not_first++;
	print STDERR "Unknown word '$_'\n" unless defined($$VCB{$_});
	$out .= $$VCB{$_};
    }
    return $out."\n";
}

### (2) RUN GIZA

sub run_giza {
    return &run_giza_on_parts if $___PARTS>1;

    print STDERR "(2) running giza @ ".`date`;
    &run_single_giza($___GIZA_F2E,$___E,$___F,
		     $___VCB_E,$___VCB_F,
		     $___CORPUS_DIR."/$___F-$___E-int-train.snt")
	unless $___DIRECTION == 2;
    &run_single_giza($___GIZA_E2F,$___F,$___E,
		     $___VCB_F,$___VCB_E,
		     $___CORPUS_DIR."/$___E-$___F-int-train.snt")
	unless $___DIRECTION == 1;
}

sub run_giza_on_parts {
    print STDERR "(2) running giza on $___PARTS cooc parts @ ".`date`;
    my $size = `cat $___CORPUS_DIR/$___F-$___E-int-train.snt | wc -l`;
    
    &run_single_giza_on_parts($___GIZA_F2E,$___E,$___F,
			      $___VCB_E,$___VCB_F,
			      $___CORPUS_DIR."/$___F-$___E-int-train.snt",$size)
   	unless $___DIRECTION == 2;
 
    &run_single_giza_on_parts($___GIZA_E2F,$___F,$___E,
			      $___VCB_F,$___VCB_E,
			      $___CORPUS_DIR."/$___E-$___F-int-train.snt",$size)
   	unless $___DIRECTION == 1;
}

sub run_single_giza_on_parts {
    my($dir,$e,$f,$vcb_e,$vcb_f,$train,$size) = @_;
    
    my $part = 0;

    # break up training data into parts
    open(SNT,$train);
    { 
	my $i=0;
	while(<SNT>) {
	    $i++;
	    if ($i%3==1 && $part < ($___PARTS*$i)/$size && $part<$___PARTS) {
		close(PART) if $part;
		$part++;
		`mkdir -p $___CORPUS_DIR/part$part`;
		open(PART,">$___CORPUS_DIR/part$part/$f-$e-int-train.snt");
	    }
	    print PART $_;
	}
    }
    close(PART);
    close(SNT);

    # run snt2cooc in parts
    for(my $i=1;$i<=$___PARTS;$i++) {
	&run_single_snt2cooc("$dir/part$i",$e,$f,$vcb_e,$vcb_f,"$___CORPUS_DIR/part$i/$f-$e-int-train.snt");
    }

    # merge parts
    open(COOC,">$dir/$f-$e.cooc");
    my(@PF,@CURRENT);
    for(my $i=1;$i<=$___PARTS;$i++) {
	open($PF[$i],"$dir/part$i/$f-$e.cooc");
	my $pf = $PF[$i];
	$CURRENT[$i] = <$pf>;
	chop($CURRENT[$i]) if $CURRENT[$i];
    }

    while(1) {
	my ($min1,$min2) = (1e20,1e20);
	for(my $i=1;$i<=$___PARTS;$i++) {
	    next unless $CURRENT[$i];
	    my ($w1,$w2) = split(/ /,$CURRENT[$i]);
	    if ($w1 < $min1 || ($w1 == $min1 && $w2 < $min2)) {
		$min1 = $w1;
		$min2 = $w2;
	    }
	}
	last if $min1 == 1e20;
	print COOC "$min1 $min2\n";
	for(my $i=1;$i<=$___PARTS;$i++) {
	    next unless $CURRENT[$i];
	    my ($w1,$w2) = split(/ /,$CURRENT[$i]);
	    if ($w1 == $min1 && $w2 == $min2) {
		my $pf = $PF[$i];
		$CURRENT[$i] = <$pf>;
		chop($CURRENT[$i]) if $CURRENT[$i];
	    }
	}	
    }
    for(my $i=1;$i<=$___PARTS;$i++) {
	close($PF[$i]);
    }
    close(COOC);

    # run giza
    &run_single_giza($dir,$e,$f,$vcb_e,$vcb_f,$train);
}

sub run_single_giza {
    my($dir,$e,$f,$vcb_e,$vcb_f,$train) = @_;

    my %GizaDefaultOptions = 
	(p0 => .999 ,
	 m1 => 5 , 
	 m2 => 0 , 
	 m3 => 3 , 
	 m4 => 3 , 
	 o => "giza" ,
	 nodumps => 1 ,
	 onlyaldumps => 1 ,
	 nsmooth => 4 , 
	 model4smoothfactor => 0.4 ,
	 t => $vcb_f,
         s => $vcb_e,
	 c => $train,
	 CoocurrenceFile => "$dir/$f-$e.cooc",
	 o => "$dir/$f-$e");

    if ($___GIZA_OPTION) {
	foreach (split(/[ ,]+/,$___GIZA_OPTION)) {
	    my ($option,$value) = split(/=/,$_,2);
	    $GizaDefaultOptions{$option} = $value;
	}
    }

    my $GizaOptions;
    foreach my $option (sort keys %GizaDefaultOptions){
	my $value = $GizaDefaultOptions{$option} ;
	$GizaOptions .= " -$option $value" ;
    }
    
    &run_single_snt2cooc($dir,$e,$f,$vcb_e,$vcb_f,$train) if $___PARTS == 1;

    print STDERR "(2.1b) running giza $f-$e @ ".`date`."$GIZA $GizaOptions\n";
    print "$GIZA $GizaOptions\n";
    return if  $___ONLY_PRINT_GIZA;
    system("$GIZA $GizaOptions");
    `rm -f $dir/$f-$e.A3.final.gz`;
    `gzip $dir/$f-$e.A3.final`;
}

sub run_single_snt2cooc {
    my($dir,$e,$f,$vcb_e,$vcb_f,$train) = @_;
    print STDERR "(2.1a) running snt2cooc $f-$e @ ".`date`."\n";
    `mkdir -p $dir`;
    print "$SNT2COOC $vcb_e $vcb_f $train > $dir/$f-$e.cooc\n";
    system("$SNT2COOC $vcb_e $vcb_f $train > $dir/$f-$e.cooc");
}

### (3) CREATE WORD ALIGNMENT FROM GIZA ALIGNMENTS

sub word_align {
    print STDERR "(3) generate word alignment @ ".`date`;
    my (%WORD_TRANSLATION,%TOTAL_FOREIGN,%TOTAL_ENGLISH);
    if (-e "$___GIZA_F2E/$___F-$___E.A3.final.bz2") {
	open(ALIGNMENT,    "$BZCAT $___GIZA_F2E/$___F-$___E.A3.final.bz2|");
	open(ALIGNMENT_INV,"$BZCAT $___GIZA_E2F/$___E-$___F.A3.final.bz2|");    
    }
    else {
	open(ALIGNMENT,    "$ZCAT $___GIZA_F2E/$___F-$___E.A3.final.gz|");
	open(ALIGNMENT_INV,"$ZCAT $___GIZA_E2F/$___E-$___F.A3.final.gz|");
    }
    `mkdir -p $___MODEL_DIR` unless -e $___MODEL_DIR;
    open(E,">$___MODEL_DIR/aligned.$___E");
    open(F,">$___MODEL_DIR/aligned.$___F");
    open(A,">$___MODEL_DIR/aligned.$___ALIGNMENT");
    for(my $id=0;;$id++) {    
	if (($id % 100) == 0) { print STDERR "."; }
	my ($status,$ALIGNMENT,$ENGLISH,$FOREIGN) 
	    = &get_alignment($id,\*ALIGNMENT,\*ALIGNMENT_INV);
	last if $status == -1;
	if ($status == 0 && $___NOTE_ALIGNMENT_DROPS) {
	    print E "\n";
	    print F "\n";
	    print A "\n";
	}
	next if $status == 0;
	
	for(my $fi=0;$fi<=$#$FOREIGN;$fi++) {
	    print F " " if $fi;
	    print F $$FOREIGN[$fi];
	}
	print F "\n";
	
	for(my $ei=0;$ei<=$#$ENGLISH;$ei++) {
	    print E " " if $ei;
	    print E $$ENGLISH[$ei];
	}
	print E "\n";
	
	my $count=0;
	for(my $fi=0;$fi<=$#$FOREIGN;$fi++) {     
	    next if !defined($$ALIGNMENT[$fi]);
	    for(my $ei=0;$ei<=$#$ENGLISH;$ei++) {
		next unless $$ALIGNMENT[$fi][$ei] == 2;
		print A " " if $count++;
		print A "$fi-$ei";
	    }
	}
	print A "\n";
    }
    close(ALIGNMENT_INV);
    close(ALIGNMENT);
    print STDERR "\n";

    if ($_STEM_F || $_STEM_E) {
	$___CORPUS =~ s/\.stem$// if $___FIRST_STEP == 1;
	my $words_before_e = `cat $___CORPUS.$___E | wc -w`;
	my $words_after_e  = `cat $___MODEL_DIR/aligned.$___E | wc -w`;
	my $words_before_f = `cat $___CORPUS.$___F | wc -w`;
	my $words_after_f  = `cat $___MODEL_DIR/aligned.$___F | wc -w`;
	if ($words_before_e != $words_after_e ||
	    $words_before_f != $words_after_f) {
	    print STDERR "GIZA++ reduced the number of words! e:$words_before_e->$words_after_e f:$words_before_f->$words_after_f\n";
	    exit(1);
	}
	`mv $___MODEL_DIR/aligned.$___E $___MODEL_DIR/aligned.$___E.bak` if $_DEBUG;
	`mv $___MODEL_DIR/aligned.$___F $___MODEL_DIR/aligned.$___F.bak` if $_DEBUG;
	`cp $___CORPUS.$___E $___MODEL_DIR/aligned.$___E`;
	`cp $___CORPUS.$___F $___MODEL_DIR/aligned.$___F`;
    }
}

sub get_alignment {
    my ($id,$AFILE,$AFILE_INV) = @_;
    my $ALIGNMENT;
    
    # read alignments
    my ($ok, $F2E,$ENGLISH, $FOREIGN,$debug)  = &load_alignment_giza($AFILE,0);
    return -1 unless $ok;
    my ($ok2,$E2F,$FOREIGN2,$ENGLISH2,$debug2) = &load_alignment_giza($AFILE_INV,1);
    return -1 unless $ok2;
    
    # sanity checks
    if ($#$ENGLISH != $#$ENGLISH2 || $#$FOREIGN != $#$FOREIGN2) {
	print STDERR "line $id: different number of words: ($#$ENGLISH != $#$ENGLISH2 || $#$FOREIGN != $#$FOREIGN2), skipping\n"; # .$debug.$debug2;
	return (0);
    }
    
    if ($___ALIGNMENT eq 'union') {
	$ALIGNMENT = &union($F2E,$E2F);
	print "UNION:\n" if $___VERBOSE >= 2;
	&print_alignment($ALIGNMENT) if $___VERBOSE >= 2;
    }
    else {
	print "INTERSECT:\n" if $___VERBOSE >= 2;
	$ALIGNMENT = &intersect($F2E,$E2F);
	&print_alignment($ALIGNMENT) if $___VERBOSE >= 2;
	if ($___ALIGNMENT eq 'e2f' || $___ALIGNMENT eq 'f2e') {
	    print "RELY ON MONO:\n" if $___VERBOSE >= 2;
	    my $one = 1.1;
	    $one = 1.2 if $___ALIGNMENT eq 'e2f';
	    for(my $ei=0;$ei<=$#{$$E2F[0]};$ei++) {
		for(my $fi=0;$fi<=$#$E2F;$fi++) {
		    $$ALIGNMENT[$fi][$ei] = 2 if $$ALIGNMENT[$fi][$ei] == $one;
		}
	    }
	    &print_alignment($ALIGNMENT) if $___VERBOSE >= 2;
	}
	elsif ($___ALIGNMENT =~ /grow/) {
	    print "GROW:\n" if $___VERBOSE >= 2;
	    if ($___ALIGNMENT =~ /grow12/) {
		while(&grow($ALIGNMENT,1.1,$___ALIGNMENT =~ /diag/)) {};
		while(&grow($ALIGNMENT,1.2,$___ALIGNMENT =~ /diag/)) {};
	    }
	    elsif ($___ALIGNMENT =~ /grow21/) {
		while(&grow($ALIGNMENT,1.2,$___ALIGNMENT =~ /diag/)) {};
		while(&grow($ALIGNMENT,1.1,$___ALIGNMENT =~ /diag/)) {};
	    }
	    else {
		while(&grow($ALIGNMENT,1,  $___ALIGNMENT =~ /diag/)) {};
	    }
	    &print_alignment($ALIGNMENT) if $___VERBOSE >= 2;
	    if ($___ALIGNMENT =~ /final/) {
		print "FINAL:\n" if $___VERBOSE >= 2;
		&grow_final($ALIGNMENT,1.1,$___ALIGNMENT =~ /final-and/) 
		    if $___ALIGNMENT =~ /12/;
		&grow_final($ALIGNMENT,1.2,$___ALIGNMENT =~ /final-and/);
		&grow_final($ALIGNMENT,1.1,$___ALIGNMENT =~ /final-and/) 
		    unless $___ALIGNMENT =~ /12/;
		&print_alignment($ALIGNMENT) if $___VERBOSE >= 2;
	    }
	}
    }
    return (1,$ALIGNMENT,$ENGLISH,$FOREIGN);
}

sub load_alignment_giza {
    my($FP,$inv_flag) = @_;
    
    my $dummy   = <$FP>;
    return 0 unless $dummy;
    
    # foreign line
    my $foreign = <$FP>;
    chop($foreign);
    my @F = split(/ /,$foreign);
    
    # english line
    my $english_with_alignment = <$FP>;
    print $english_with_alignment if $___VERBOSE >= 2;
    my @ETOK = split(/\}\)/,$english_with_alignment);
    
    # extract alignment
    my @A = ();
    my @E = ();
    for(my $e=1;$e<$#ETOK;$e++) {
	my @ALIGN = split(/ /,$ETOK[$e]);
	push @E,$ALIGN[1];
	# init with 0
	for(my $f=0;$f<=$#F;$f++) {
	    $A[$f][$e-1] = 0 if !$inv_flag;
	    $A[$e-1][$f] = 0 if  $inv_flag;      
	}
	# scanning through 'the ({ 1 12'
	for(my $a=3; $a<=$#ALIGN; $a++) {
	    $A[$ALIGN[$a]-1][$e-1] = 1 if !$inv_flag;
	    $A[$e-1][$ALIGN[$a]-1] = 1 if  $inv_flag;
	}
    }
    
    return (1,\@A,\@E,\@F,$english_with_alignment.$foreign."/n");
}

sub intersect {
    my($F2E,$E2F) = @_;
    my @A = ();
    for(my $ei=0;$ei<=$#{$$E2F[0]};$ei++) {
	for(my $fi=0;$fi<=$#$E2F;$fi++) {
	    if ($$F2E[$fi][$ei] && $$E2F[$fi][$ei]) {
		$A[$fi][$ei] = 2;
	    }
	    elsif ($$F2E[$fi][$ei]) {
		$A[$fi][$ei] = 1.1;
	    }
	    elsif ($$E2F[$fi][$ei]) {
		$A[$fi][$ei] = 1.2;
	    }
	    else {
		$A[$fi][$ei] = 0;
	    }
	}
    }
    return \@A;
}

sub union {
    my($F2E,$E2F) = @_;
    my @A = ();
    for(my $ei=0;$ei<=$#{$$E2F[0]};$ei++) {
	for(my $fi=0;$fi<=$#$E2F;$fi++) {
	    $A[$fi][$ei] = ($$F2E[$fi][$ei] || $$E2F[$fi][$ei]) ? 2 : 0;
	}
    }
    return \@A;
}

sub grow {
    my($A,$one,$diagonal) = @_;
    my @OFFSET;
    { my @P = (-1, 0); push @OFFSET,\@P; }
    { my @P = ( 0,-1); push @OFFSET,\@P; }
    { my @P = ( 1, 0); push @OFFSET,\@P; }
    { my @P = ( 0, 1); push @OFFSET,\@P; }
    if ($diagonal) {
	{ my @P = (-1,-1); push @OFFSET,\@P; }
	{ my @P = (-1, 1); push @OFFSET,\@P; }
	{ my @P = ( 1,-1); push @OFFSET,\@P; }
	{ my @P = ( 1, 1); push @OFFSET,\@P; }
    }
    my $added=0;
    $one = 1 unless $one;
    my ($FOREIGN_ALIGNED,$ENGLISH_ALIGNED) = &count_aligned($A);
    for(my $ei=0;$ei<=$#{$$A[0]};$ei++) {
	for(my $fi=0;$fi<=$#$A;$fi++) {
	    next unless $$A[$fi][$ei] == 2;
	    foreach my $P (@OFFSET) {
		my ($offset_e,$offset_f) = @{$P};
		next if $fi+$offset_f<0 || $fi+$offset_f>$#$A;
		next if $ei+$offset_e<0 || $ei+$offset_e>$#{$$A[0]};
		next unless (($one==1 && int($$A[$fi+$offset_f][$ei+$offset_e]) == 1)
			     || $$A[$fi+$offset_f][$ei+$offset_e] == $one);
		next if ($$FOREIGN_ALIGNED{$fi+$offset_f} && 
			 $$ENGLISH_ALIGNED{$ei+$offset_e});
		$$A[$fi+$offset_f][$ei+$offset_e] = 2;
		$$FOREIGN_ALIGNED{$fi+$offset_f}++;
		$$ENGLISH_ALIGNED{$ei+$offset_e}++;
		$added++;
	    }
	}
    }
    return $added;
}

sub grow_final {
    my($A,$one,$and) = @_;
    my $added = 0;
    my ($FOREIGN_ALIGNED,$ENGLISH_ALIGNED) = &count_aligned($A);
    for(my $ei=0;$ei<=$#{$$A[0]};$ei++) {
	for(my $fi=0;$fi<=$#$A;$fi++) {
	    next unless $$A[$fi][$ei] == $one;
	    next if (!$and && $$FOREIGN_ALIGNED{$fi} && $$ENGLISH_ALIGNED{$ei});
	    next if ($and && ($$FOREIGN_ALIGNED{$fi} || $$ENGLISH_ALIGNED{$ei}));
	    $$A[$fi][$ei] = 2;
	    $$FOREIGN_ALIGNED{$fi}++;
	    $$ENGLISH_ALIGNED{$ei}++;
	}
    }
    return $added;
}

sub print_alignment {
    my($A) = @_;
    for(my $ei=0;$ei<=$#{$$A[0]};$ei++) {
	for(my $fi=0;$fi<=$#$A;$fi++) {
	    if($$A[$fi][$ei] == 2) {
		print "#";
	    }
	    elsif ($$A[$fi][$ei] == 1.1) {
		print "-";
	    }
	    elsif ($$A[$fi][$ei] == 1.2) {
		print "|";
	    }
	    else {
		print ".";
	    }
	}
	print "\n";
    }
}

sub count_aligned {
    my ($A) = @_;
    my (%FOREIGN_ALIGNED,%ENGLISH_ALIGNED);
    for(my $ei=0;$ei<=$#{$$A[0]};$ei++) {
	for(my $fi=0;$fi<=$#$A;$fi++) {
	    next unless $$A[$fi][$ei] == 2;
	    $FOREIGN_ALIGNED{$fi}++;
	    $ENGLISH_ALIGNED{$ei}++;
	}
    }
    return (\%FOREIGN_ALIGNED,\%ENGLISH_ALIGNED);
}

sub open_alignment {
    open(E,"$___MODEL_DIR/aligned.$___E");
    open(F,"$___MODEL_DIR/aligned.$___F");
    open(A,"$___MODEL_DIR/aligned.$___ALIGNMENT");
    $alignment_id=0;
}

sub read_alignment {
    my ($ALIGNMENT,$FOREIGN,$ENGLISH) = @_;
    if (($alignment_id++ % 1000) == 0) { print STDERR "!"; }
    my $e = <E>;
    return 0 unless $e;
    chomp($e); 
    @{$ENGLISH} = split(/ /,$e);
    my $f = <F>; chomp($f); 
    @{$FOREIGN} = split(/ /,$f);
    my $a = <A>; chomp($a); 
    return -1 if $e eq '';
    for(my $fi=0;$fi<=$#$FOREIGN;$fi++) {
	for(my $ei=0;$ei<=$#$ENGLISH;$ei++) {
	    $$ALIGNMENT[$fi][$ei]=0;
	}
    }
    foreach (split(/ /,$a)) {
	my ($fi,$ei) = split(/\-/);
	$$ALIGNMENT[$fi][$ei] = 2;
    }
    return 1;
}

sub close_alignment {
    print STDERR "\n";
    close(A);
    close(F);
    close(E);
}

### (4) BUILDING LEXICAL TRANSLATION TABLE

sub get_lexical {
    print STDERR "(4) generate lexical translation table @ ".`date`;
    my (%WORD_TRANSLATION,%TOTAL_FOREIGN,%TOTAL_ENGLISH);
    &open_alignment();
    while(1) {
	my (@ALIGNMENT,@FOREIGN,@ENGLISH);
	last unless &read_alignment(\@ALIGNMENT,\@FOREIGN,\@ENGLISH);
	next if scalar(@ENGLISH) == 0;
	&find_aligned_words(\@ALIGNMENT,\@ENGLISH,\@FOREIGN,
			    \%WORD_TRANSLATION,\%TOTAL_FOREIGN,\%TOTAL_ENGLISH);
    }
    &close_alignment();
    &save_word_translation(\%WORD_TRANSLATION,\%TOTAL_FOREIGN,\%TOTAL_ENGLISH);
    print STDERR "\n";
}

sub find_aligned_words {
    my ($ALIGNMENT,$ENGLISH,$FOREIGN,
	$WORD_TRANSLATION,$TOTAL_FOREIGN,$TOTAL_ENGLISH) = @_;
    my ($FOREIGN_ALIGNED,$ENGLISH_ALIGNED) = &count_aligned($ALIGNMENT);
    for(my $ei=0;$ei<=$#$ENGLISH;$ei++) {
	next unless (!defined($$ENGLISH_ALIGNED{$ei}) 
		     || $$ENGLISH_ALIGNED{$ei} == 0);
	$$WORD_TRANSLATION{"NULL"}{$$ENGLISH[$ei]}++;
	$$TOTAL_ENGLISH{$$ENGLISH[$ei]}++;
	$$TOTAL_FOREIGN{"NULL"}++;
    }
    for(my $fi=0;$fi<=$#$FOREIGN;$fi++) {
	if (!defined($$FOREIGN_ALIGNED{$fi}) || $$FOREIGN_ALIGNED{$fi} == 0) {
	    $$WORD_TRANSLATION{$$FOREIGN[$fi]}{"NULL"}++;
	    $$TOTAL_FOREIGN{$$FOREIGN[$fi]}++;
	    $$TOTAL_ENGLISH{"NULL"}++;
	}
	else {
	    for(my $ei=0;$ei<=$#$ENGLISH;$ei++) {
		next unless $$ALIGNMENT[$fi][$ei] == 2;
		$$WORD_TRANSLATION{$$FOREIGN[$fi]}{$$ENGLISH[$ei]}++;
		$$TOTAL_FOREIGN{$$FOREIGN[$fi]}++;
		$$TOTAL_ENGLISH{$$ENGLISH[$ei]}++;
	    }
	}
    }
}

sub save_word_translation {
    my ($WORD_TRANSLATION,$TOTAL_FOREIGN,$TOTAL_ENGLISH) = @_;
    `mkdir -p $___LEXICAL_DIR`;
    open(F2E,">$___LEXICAL_DIR/lex.f2n");
    open(E2F,">$___LEXICAL_DIR/lex.n2f");
    foreach my $f (keys %{$WORD_TRANSLATION}) {
	foreach my $e (keys %{$$WORD_TRANSLATION{$f}}) {
	    printf F2E "%s %s %.7f\n",$e,$f,$$WORD_TRANSLATION{$f}{$e}/$$TOTAL_FOREIGN{$f};
	    printf E2F "%s %s %.7f\n",$f,$e,$$WORD_TRANSLATION{$f}{$e}/$$TOTAL_ENGLISH{$e};
	}
    }
    close(E2F);
    close(F2E);
}

sub load_word_translation {
    my %WORD_TRANSLATION = ();
    open(WT,"$___LEXICAL_DIR/lex.n2f");
    while(<WT>) {
	chop;
	my($f,$e,$p) = split;
	$WORD_TRANSLATION{$f}{$e} = $p;
    }
    close(WT);
    return \%WORD_TRANSLATION;
}

### (5) PHRASE EXTRACTION

sub extract_phrase {
    print STDERR "(5) extract phrases @ ".`date`;
    print STDERR "$PHRASE_EXTRACT $___MODEL_DIR/aligned.$___E $___MODEL_DIR/aligned.$___F $___MODEL_DIR/aligned.$___ALIGNMENT $___EXTRACT_FILE $___MAX_PHRASE_LENGTH orientation\n";
    `$PHRASE_EXTRACT $___MODEL_DIR/aligned.$___E $___MODEL_DIR/aligned.$___F $___MODEL_DIR/aligned.$___ALIGNMENT $___EXTRACT_FILE $___MAX_PHRASE_LENGTH orientation`;
    `cat $___EXTRACT_FILE.o.part* > $___EXTRACT_FILE.o`;
    `gzip $___EXTRACT_FILE.o`;
    `rm -f $___EXTRACT_FILE.o.part*` unless $_DEBUG;
    `cat $___EXTRACT_FILE.part* > $___EXTRACT_FILE`;
    `rm -f $___EXTRACT_FILE.part*` unless $_DEBUG;
    `cat $___EXTRACT_FILE.inv.part* > $___EXTRACT_FILE.inv`;
    `rm -f $___EXTRACT_FILE.inv.part*` unless $_DEBUG;
}

### (6) PHRASE SCORING

sub score_phrase {
    print STDERR "(6) score phrases @ ".`date`;
    `gunzip $___EXTRACT_FILE.gz` if -e "$___EXTRACT_FILE.gz";
    `gunzip $___EXTRACT_FILE.inv.gz` if -e "$___EXTRACT_FILE.inv.gz";
    print STDERR "(6.1) sorting @ ".`date`;
    print "sort -T $___MODEL_DIR $___EXTRACT_FILE > $___EXTRACT_FILE.sorted\n";
    `sort -T $___MODEL_DIR $___EXTRACT_FILE > $___EXTRACT_FILE.sorted`;
    `gzip $___EXTRACT_FILE`;
    print STDERR "(6.2) sorting inv @ ".`date`;
    print "sort -T $___MODEL_DIR $___EXTRACT_FILE.inv > $___EXTRACT_FILE.inv.sorted\n";
    `sort -T $___MODEL_DIR $___EXTRACT_FILE.inv > $___EXTRACT_FILE.inv.sorted`;
    `gzip $___EXTRACT_FILE.inv`;

    for my $direction ("f2n","n2f") {
	print STDERR "(6.3) creating table half $___MODEL_DIR/phrase-table-half.$direction @ ".`date`;
	my $extract = "$___EXTRACT_FILE.sorted";
	$extract = "$___EXTRACT_FILE.inv.sorted" if $direction eq "n2f";
	my $inverse = "";
	$inverse = " inverse" if $direction eq "n2f";
	my $part_count = &split_extract($extract);
	for(my $i=0;$i<$part_count;$i++) {
	    my $part = sprintf("%04d",$i);
	    print "$PHRASE_SCORE $extract.part$part $___LEXICAL_DIR/lex.$direction $___MODEL_DIR/phrase-table-half.$direction.part$part $inverse\n";
	    `$PHRASE_SCORE $extract.part$part $___LEXICAL_DIR/lex.$direction $___MODEL_DIR/phrase-table-half.$direction.part$part $inverse`;
	    `rm $extract.part$part` unless $_DEBUG;
	}
	`cat $___MODEL_DIR/phrase-table-half.$direction.part* >$___MODEL_DIR/phrase-table-half.$direction`;
    }
    print STDERR "(6.4) sorting inverse n2f table@ ".`date`;
    print "sort -T $___MODEL_DIR $___MODEL_DIR/phrase-table-half.n2f > $___MODEL_DIR/phrase-table-half.n2f inverse.sorted\n";
    `sort -T $___MODEL_DIR $___MODEL_DIR/phrase-table-half.n2f > $___MODEL_DIR/phrase-table-half.n2f.sorted`;
    print STDERR "(6.5) consolidating the two halves @ ".`date`;
    open(F2N,"$___MODEL_DIR/phrase-table-half.f2n");
    open(N2F,"$___MODEL_DIR/phrase-table-half.n2f.sorted");
    open(TABLE,">$___MODEL_DIR/phrase-table");
    my $i=0;
    while(my $f2n = <F2N>) {
	$i++;
	my $n2f = <N2F>;
	my ($english,$foreign,$p) = split(/ \|\|\| /,$n2f); chop($p);
	my ($english2,$foreign2,$p2) = split(/ \|\|\| /,$f2n); chop($p2);
	if ($english ne $english2 || $foreign ne $foreign2) {
	    print STDERR "mismatch line $i: ($english ne $english2 || $foreign ne $foreign2)\n";
	    next;
	}
	print TABLE "$english ||| $foreign ||| $p $p2 2.718\n";
    }
    close(N2F);
    close(F2N);
    `rm -f $___MODEL_DIR/phrase-table-half.*` unless $_DEBUG;
    `rm -f $___MODEL_DIR/extract*sorted*` unless $_DEBUG;
    `gzip $___MODEL_DIR/phrase-table`;
}

sub split_extract {
    my ($file) = @_;
    my $i=0;
    my $part = 1;
    my $split_when_possible = 0;
    my ($first,$dummy);
    open(PART,sprintf(">%s.part%04d",$file,0));
    open(EXTRACT,$file);
    while(<EXTRACT>) {
	if ($i>0 && $i % 10000000 == 0) {
	    $split_when_possible = 1;
	    ($first,$dummy) = split(/ \|\|\| /);
	}
	elsif ($split_when_possible) {
	    my ($f,$dummy) = split(/ \|\|\| /);
	    if ($f ne $first) {
		close(PART) if $i;
		open(PART,sprintf(">%s.part%04d",$file,$part));
		$split_when_possible = 0;
		$part++;
	    }
	}
	print PART $_;
	$i++;
    }
    close(EXTRACT);
    return $part;
}

### (7) LEARN REORDERING MODEL

sub get_reordering {
    print STDERR "(7) learn reordering model @ ".`date`;
    print STDERR "(7.1) sorting extract.o @ ".`date`;
    `gunzip $___EXTRACT_FILE.o.gz` if -e "$___EXTRACT_FILE.o.gz";
    print "sort -T $___MODEL_DIR $___EXTRACT_FILE.o > $___EXTRACT_FILE.o.sorted\n";
    `sort -T $___MODEL_DIR $___EXTRACT_FILE.o > $___EXTRACT_FILE.o.sorted`;
    `gzip $___EXTRACT_FILE.o`;

    my $smooth = $___REORDERING_SMOOTH;
    my @REORDERING_SMOOTH_PREVIOUS = ($smooth,$smooth,$smooth);
    my @REORDERING_SMOOTH_FOLLOWING = ($smooth,$smooth,$smooth);

    my (%SMOOTH_PREVIOUS,%SMOOTH_FOLLOWING);
    if ($smooth =~ /(.+)u$/) {
	$smooth = $1;
	my $smooth_total = 0; 
	open(O,"$___EXTRACT_FILE.o.sorted");
	while(<O>) {
	    chomp;
	    my ($f,$e,$o) = split(/ \|\|\| /);
	    my ($o_previous,$o_following) = split(/ /,$o);
	    $SMOOTH_PREVIOUS{$o_previous}++;
	    $SMOOTH_FOLLOWING{$o_following}++;
	    $smooth_total++;
	}
	close(O);
	@REORDERING_SMOOTH_PREVIOUS = ($smooth*($SMOOTH_PREVIOUS{"mono"}+0.1)/$smooth_total,
				       $smooth*($SMOOTH_PREVIOUS{"swap"}+0.1)/$smooth_total,
				       $smooth*($SMOOTH_PREVIOUS{"other"}+0.1)/$smooth_total);
	@REORDERING_SMOOTH_FOLLOWING = ($smooth*($SMOOTH_FOLLOWING{"mono"}+0.1)/$smooth_total,
					$smooth*($SMOOTH_FOLLOWING{"swap"}+0.1)/$smooth_total,
					$smooth*($SMOOTH_FOLLOWING{"other"}+0.1)/$smooth_total);
	printf "$smooth*($SMOOTH_FOLLOWING{mono}+0.1)/$smooth_total,
					$smooth*($SMOOTH_FOLLOWING{swap}+0.1)/$smooth_total,
					$smooth*($SMOOTH_FOLLOWING{other}+0.1)/$smooth_total\n";
	printf "smoothed following to %f,%f,%f\n",@REORDERING_SMOOTH_FOLLOWING;
    }
    
    ($mono_previous_f,$swap_previous_f,$other_previous_f) = @REORDERING_SMOOTH_PREVIOUS;
    ($mono_previous_fe,$swap_previous_fe,$other_previous_fe) = @REORDERING_SMOOTH_PREVIOUS;
    ($mono_following_f,$swap_following_f,$other_following_f) = @REORDERING_SMOOTH_FOLLOWING;
    ($mono_following_fe,$swap_following_fe,$other_following_fe) = @REORDERING_SMOOTH_FOLLOWING;

    print STDERR "(7.2) building tables @ ".`date`;
    open(O,"$___EXTRACT_FILE.o.sorted");
    open(OF,  "|gzip >$___MODEL_DIR/orientation-table.f.$___REORDERING_SMOOTH.gz") 
	if defined($REORDERING_MODEL{"orientation-f"});
    open(OFE, "|gzip >$___MODEL_DIR/orientation-table.fe.$___REORDERING_SMOOTH.gz") 
	if defined($REORDERING_MODEL{"orientation-fe"});
    open(OBF, "|gzip >$___MODEL_DIR/orientation-table.bi.f.$___REORDERING_SMOOTH.gz") 
	if defined($REORDERING_MODEL{"orientation-bidirectional-f"});
    open(OBFE,"|gzip >$___MODEL_DIR/orientation-table.bi.fe.$___REORDERING_SMOOTH.gz") 
	if defined($REORDERING_MODEL{"orientation-bidirectional-fe"});
    open(MF,  "|gzip >$___MODEL_DIR/monotonicity-table.f.$___REORDERING_SMOOTH.gz") 
	if defined($REORDERING_MODEL{"monotonicity-f"});
    open(MFE, "|gzip >$___MODEL_DIR/monotonicity-table.fe.$___REORDERING_SMOOTH.gz") 
	if defined($REORDERING_MODEL{"monotonicity-fe"});
    open(MBF, "|gzip >$___MODEL_DIR/monotonicity-table.bi.f.$___REORDERING_SMOOTH.gz") 
	if defined($REORDERING_MODEL{"monotonicity-bidirectional-f"});
    open(MBFE,"|gzip >$___MODEL_DIR/monotonicity-table.bi.fe.$___REORDERING_SMOOTH.gz") 
	if defined($REORDERING_MODEL{"monotonicity-bidirectional-fe"});

    my $first = 1;
    while(<O>) {
	chomp;
	my ($f,$e,$o) = split(/ \|\|\| /);
	my ($o_previous,$o_following) = split(/ /,$o);
	
	# store counts if new f,e
	if ($first) {
	    $f_current = $f;
	    $e_current = $e;
	    $first = 0;
	}
	elsif ($f ne $f_current || $e ne $e_current) {
	    
	    if (defined($REORDERING_MODEL{"fe"})) {
		# compute probs, store them
		&store_reordering_fe();
		
		# reset counters
		($mono_previous_fe,$swap_previous_fe,$other_previous_fe) = @REORDERING_SMOOTH_PREVIOUS;
		($mono_following_fe,$swap_following_fe,$other_following_fe) = @REORDERING_SMOOTH_FOLLOWING;
	    }

	    # store counts if new f
	    if ($f ne $f_current && defined($REORDERING_MODEL{"f"})) {
		
		# compute probs, store them
		&store_reordering_f();
		
		# reset counters
		($mono_previous_f,$swap_previous_f,$other_previous_f) = @REORDERING_SMOOTH_PREVIOUS;
		($mono_following_f,$swap_following_f,$other_following_f) = @REORDERING_SMOOTH_FOLLOWING;
		
	    }
	    $f_current = $f;
	    $e_current = $e;
	}	
	# update counts
	if    ($o_previous eq 'mono') {  $mono_previous_f++;  $mono_previous_fe++; }
	elsif ($o_previous eq 'swap') {  $swap_previous_f++;  $swap_previous_fe++; }
	elsif ($o_previous eq 'other'){ $other_previous_f++; $other_previous_fe++; }
	else { print STDERR "buggy line (o_previous:$o_previous): $_\n"; }
	
	if    ($o_following eq 'mono') {  $mono_following_f++;  $mono_following_fe++; }
	elsif ($o_following eq 'swap') {  $swap_following_f++;  $swap_following_fe++; }
	elsif ($o_following eq 'other'){ $other_following_f++; $other_following_fe++; }
	else { print STDERR "buggy line (o_following:$o_following): $_\n"; }

    }
    if (defined($REORDERING_MODEL{"f"})) {
	&store_reordering_f();
    }
    if (defined($REORDERING_MODEL{"fe"})) {
	&store_reordering_fe();
    }
    `rm $___EXTRACT_FILE.o.sorted` unless $_DEBUG;
}

sub store_reordering_f {
    my $total_previous_f = $mono_previous_f+$swap_previous_f+$other_previous_f;
    my $total_following_f = $mono_following_f+$swap_following_f+$other_following_f;
    if(defined($REORDERING_MODEL{"orientation-f"})) {
 	printf OF ("%s ||| %.5f %.5f %.5f\n",
		   $f_current, 
		   $mono_previous_f/$total_previous_f,
		   $swap_previous_f/$total_previous_f,
		   $other_previous_f/$total_previous_f);
    }
    if(defined($REORDERING_MODEL{"orientation-bidirectional-f"})) {
	printf OBF ("%s ||| %.5f %.5f %.5f %.5f %.5f %.5f\n",
		    $f_current, 
		    $mono_previous_f/$total_previous_f,
		    $swap_previous_f/$total_previous_f,
		    $other_previous_f/$total_previous_f,
		    $mono_following_f/$total_following_f,
		    $swap_following_f/$total_following_f,
		    $other_following_f/$total_following_f);
    }
    if(defined($REORDERING_MODEL{"monotonicity-f"})) {
	printf MF ("%s ||| %.5f %.5f\n",
		  $f_current, 
		   $mono_previous_f/$total_previous_f,
		   ($swap_previous_f+$other_previous_f)/$total_previous_f);
    }
    if(defined($REORDERING_MODEL{"monotonicity-bidirectional-f"})) {
	printf MBF ("%s ||| %.5f %.5f %.5f %.5f\n",
		    $f_current, 
		    $mono_previous_f/$total_previous_f,
		    ($swap_previous_f+$other_previous_f)/$total_previous_f,
		    $mono_following_f/$total_following_f,
		    ($swap_following_f+$other_following_f)/$total_following_f);
    }
}

sub store_reordering_fe {
    my $total_previous_fe = $mono_previous_fe+$swap_previous_fe+$other_previous_fe;
    my $total_following_fe = $mono_following_fe+$swap_following_fe+$other_following_fe;
    
    if(defined($REORDERING_MODEL{"orientation-fe"})) {
 	printf OFE ("%s ||| %s ||| %.5f %.5f %.5f\n",
		   $f_current, $e_current, 
		   $mono_previous_fe/$total_previous_fe,
		   $swap_previous_fe/$total_previous_fe,
		   $other_previous_fe/$total_previous_fe);
    }
    if(defined($REORDERING_MODEL{"orientation-bidirectional-fe"})) {
	printf OBFE ("%s ||| %s ||| %.5f %.5f %.5f %.5f %.5f %.5f\n",
		    $f_current, $e_current, 
		    $mono_previous_fe/$total_previous_fe,
		    $swap_previous_fe/$total_previous_fe,
		    $other_previous_fe/$total_previous_fe,
		    $mono_following_fe/$total_following_fe,
		    $swap_following_fe/$total_following_fe,
		    $other_following_fe/$total_following_fe);
    }
    if(defined($REORDERING_MODEL{"monotonicity-fe"})) {
	printf MFE ("%s ||| %s ||| %.5f %.5f\n",
		   $f_current, $e_current, 
		   $mono_previous_fe/$total_previous_fe,
		   ($swap_previous_fe+$other_previous_fe)/$total_previous_fe);
    }
    if(defined($REORDERING_MODEL{"monotonicity-bidirectional-fe"})) {
	printf MBFE ("%s ||| %s ||| %.5f %.5f %.5f %.5f\n",
		    $f_current, $e_current, 
		    $mono_previous_fe/$total_previous_fe,
		    ($swap_previous_fe+$other_previous_fe)/$total_previous_fe,
		    $mono_following_fe/$total_following_fe,
		    ($swap_following_fe+$other_following_fe)/$total_following_fe);
    }
}

### (8) CREATE CONFIGURATION FILE

sub create_ini {
    print STDERR "(8) create pharaoh.ini @ ".`date`;
    
    &full_path(\$___MODEL_DIR);
    &full_path(\$___VCB_E);
    &full_path(\$___VCB_F);
    open(INI,">$___MODEL_DIR/pharaoh.ini");
    print INI "###########################
### PHARAOH CONFIG FILE ###
###########################

# phrase table f, n, p(n|f)
[ttable-file]
$___MODEL_DIR/phrase-table

# language model
[lmodel-file]
$___LM

# limit on how many phrase translations e for each phrase f are loaded
# 0 = all elements loaded
[ttable-limit]
20\n\n";

  my $weight_d_count = 0;
  if ($___REORDERING ne "distance") {
    my $file = "# distortion (reordering) files\n[distortion-file]\n";
    my $type = "# distortion (reordering) type\n[distortion-type]\n";
    foreach my $r (keys %REORDERING_MODEL) {
        next if $r eq "fe" || $r eq "f";
	$type .= $r."\n";
	if ($r eq "distance") { $weight_d_count++; } 
	else {
	    $r =~ s/-bidirectional/.bi/;
	    $r =~ s/-f/.f/;
	    $r =~ s/orientation/orientation-table/;
	    $r =~ s/monotonicity/monotonicity-table/;
	    $file .= "$___MODEL_DIR/$r.$___REORDERING_SMOOTH\n";
	    
	    my $w;
	    if ($r =~ /orient/) { $w = 3; } else { $w = 1; }
	    if ($r =~ /bi/) { $w *= 2; }
	    $weight_d_count += $w;
	}
    }
    print INI $type."\n".$file."\n";
  }
  else {
    $weight_d_count = 1;
  }
  
  print INI "# distortion (reordering) weight\n[weight-d]\n";
  for(my $i=0;$i<$weight_d_count;$i++) { 
    print INI "".(0.5/(scalar keys %REORDERING_MODEL))."\n";
  }
  print INI "\n# language model weight
[weight-l]
0.5

# translation model weight (phrase translation, lexical weighting)
[weight-t]
0.2
0.2
0.2
0.2
0.2

# word penalty
[weight-w]
-1
";


  close(INI);
}

sub full_path {
    my ($PATH) = @_;
    return if $$PATH =~ /^\//;
    $$PATH = `pwd`."/".$$PATH;
    $$PATH =~ s/[\r\n]//g;
    $$PATH =~ s/\/\.\//\//g;
    $$PATH =~ s/\/+/\//g;
    my $sanity = 0;
    while($$PATH =~ /\/\.\.\// && $sanity++<10) {
	$$PATH =~ s/\/+/\//g;
	$$PATH =~ s/\/[^\/]+\/\.\.\//\//g;
    }
    $$PATH =~ s/\/[^\/]+\/\.\.$//;
    $$PATH =~ s/\/+$//;
}

