#!/usr/bin/perl -w

use strict;

my $corpus = $ARGV[0];
my $l1 = $ARGV[1];
my $l2 = $ARGV[2];
my $out = $ARGV[3];
my $min = $ARGV[4];
my $max = $ARGV[5];

print STDERR "clean-corpus.perl: processing $corpus.$l1 & .$l2 to $out, cutoff $min-$max\n";

open(F,"$corpus.$l1");
open(E,"$corpus.$l2");
open(FO,">$out.$l1");
open(EO,">$out.$l2");

while(my $f = <F>) {
  my $e = <E>;
  chop($e);
  chop($f);
  $e =~ s/\s+/ /g;
  $e =~ s/^ //;
  $e =~ s/ $//;
  $f =~ s/\s+/ /g;
  $f =~ s/^ //;
  $f =~ s/ $//;
  next if $f eq '';
  next if $e eq '';
  my @E = split(/ /,$e);
  my @F = split(/ /,$f);
  next if scalar(@E) > $max;
  next if scalar(@F) > $max;
  next if scalar(@E) < $min;
  next if scalar(@F) < $min;
  next if scalar(@E)/scalar(@F) > 9;
  next if scalar(@F)/scalar(@E) > 9;
  print FO $f."\n";
  print EO $e."\n";
}
