package Clair::MEAD::DocsentConverter;

use strict;
use Clair::Document;
use Clair::Config;
use lib "$MEAD_HOME/lib";
use MEAD::MEAD_ADDONS_UTIL;

sub new {
    my $class = shift;
    my %parameters = @_;

    unless (defined $parameters{dest}) {
        die "Expected directory";
    }

    unless (defined $parameters{lang}) {
        $parameters{lang} = "ENG";
    }

    unless (defined $parameters{name}) {
        $parameters{name} = $parameters{dest};
    }

    my @docs = ();
    $parameters{docs} = \@docs;

    my $self = bless \%parameters, $class; 
    return $self;
}

sub add_cluster {
    my $self = shift;
    my $cluster = shift;

    my @docs = @{ $self->{docs} };
    my $documents = $cluster->documents();
    foreach my $doc (values %$documents) {
        push @docs, $doc;
    }

    $self->{docs} = \@docs;
}

sub add_directory {
    my $self = shift;
    my $dir = shift;

    my @docs = @{ $self->{docs} };

    die "Expected directory: $!" unless (-d $dir);

    opendir DIR, $dir or die "Could not read directory: $!";
    my @files = readdir(DIR);
    close DIR;

    foreach my $file (@files) {
        if (-f "$dir/$file") {
            my $type = "text";
            if ($file =~ /\.html$/) {
                $type = "html"
            }
            $file =~ /([^\/]+)$/;
            my $id = $1;
            my $doc = Clair::Document->new(
                file => "$dir/$file",
                type => $type,
                id => $id
            );
            push @docs, $doc;
        }
    }

    $self->{docs} = \@docs;
}

sub add_document {
    my $self = shift;
    my @new_docs = @_;

    my @docs = @{ $self->{docs} };
    
    push @docs, @new_docs;

    $self->{docs} = \@docs;
}

sub add_file {
    my $self = shift;
    my @files = @_;

    my @docs = @{ $self->{docs} };
   
    foreach my $file (@files) {

        my $type;
        if ($file =~ /\.html$/) {
            $type = "html";
        } else {
            $type = "text";
        }

        $file =~ /([^\/]+)$/;
        my $id = $1;

        my $doc = Clair::Document->new(file => $file, 
                                       type => $type, 
                                       id => $1);
        push @docs, $doc;
    }

    $self->{docs} = \@docs;
}

sub convert {
    my $self = shift;
    $self->_clean_ids();
    my @docs = @{ $self->{docs} };

    my $docsent_dir = "$self->{dest}/docsent";
    my $orig_dir = "$self->{dest}/orig";

    mkdir($self->{dest}) or die $! unless (-d $self->{dest});
    mkdir($docsent_dir) or die $! unless (-d $docsent_dir); 
    mkdir($orig_dir) or die $! unless (-d $orig_dir); 


    # Create the docsent files and the originals
    foreach my $doc (@docs) {
        my $id = $doc->get_id();

        my $out_file = "$docsent_dir/$id.docsent";

        $doc->strip_html();

        open DOCSENT, "> $out_file" or die "Could not write to $out_file: $!";
        print DOCSENT get_docsent_header($id);

        my $i = 1;
        foreach my $sentence ($doc->split_into_sentences()) {
            $sentence = sanitize($sentence);
            print DOCSENT "<S PAR=\"1\" RSNT=\"1\" SNO=\"$i\">$sentence</S>\n";
            $i++;
        }

        print DOCSENT get_docsent_tail;
        close DOCSENT;

        open ORIG, "> $orig_dir/$id";
        print ORIG $doc->get_text();
        close ORIG;
    } 

    # Create the cluster file
    my $outname = $self->{name};
    $outname =~ s/.*?\///g;
    my $cluster_file = "$self->{dest}/$outname.cluster";
    open CLUSTER, "> $cluster_file";
    print CLUSTER get_cluster_header($self->{lang});

    foreach my $doc (@docs) {
        my $id = $doc->get_id();
        $id =~ s/.*\///g;
        print CLUSTER "<D DID=\"$id\"/>\n";
    }

    print CLUSTER get_cluster_tail;
    close CLUSTER;
}

sub _clean_ids {
    my $self = shift;
    my @docs = @{ $self->{docs} };
    foreach my $doc (@docs) {
        my $id = $doc->get_id();
        $id =~ s/.*?\///g;
        $id =~ s/\.docsent//g;
        $doc->set_id(id => $id);
    }
}

=head1 NAME

CLAIR::MEAD::DocsentConverter - Document => Mead Cluster converter

head1 VERSION

Version 0.01

=cut

our $VERSION = '0.01';

=head1 SYNOPSIS

This module is use to take documents from a directory, Clair::Document
objects, or Clair::Cluster objects and create a Mead-style cluster directory.
    
    use Clair::MEAD::DocsentConverter;
    use Clair::Document;
    use Clair::Cluster;
    my $doc = Clair::Document->new( ... );
    my $cluster  = Clair::Cluster->new( ... );
    my $dir = "some/path";
    my $file = "file.txt";
    ...
    my $c = Clair::MEAD::DocsentConverter->new( 
        dest => "outputdir", 
        name => "mycluster" );
    $c->add_document($doc);
    $c->add_cluster($cluster);
    $c->add_directory($dir);
    $c->add_file($file);
    $c->convert();

=head1 METHODS

=cut


=head2 new

    $c = Clair::MEAD::DocsentConverter(
        dest => "output/path",
        lang => "ENG",
        naem => "mycluster" 
    );

Creates a new converter object.

=cut

=head2 add_file
Adds the given file to the cluster.

=cut

=head2 add_document

Adds the given Clair::Document to the cluster.

=cut

=head2 add_directory

Adds all of the files in the given directory to the cluster.

=cut

=head2 add_cluster

Adds all of the documents in the given Clair::Cluster to the cluster.

=cut

=head2 convert

Converts the previously added files to a Mead cluster in the directory
set in the constructor.

=cut

1;
