
package EPrints::Plugin::Import::EssPer;

=head1 NAME

EPrints::Plugin::Import::EssPer -- allows to grab Metadata from EssPer records

=head1 DESCRIPTION

This plugin allows you to grab metadata from ESSPER (http://www.biblio.liuc.it/scripts/essper/) and import them into EPrints.

=head1 CONFIGURATION

Copy this into perl_lib/EPrints/Plugin/Import and restart webserver. Needs HTML::Parse and HTML::FormatText.

=head1 COPYRIGHT AND LICENSE

(C) 2010 Michele Pinassi <michele.pinassi@unisi.it>
This module is free software under the same terms of Perl.

=cut
use strict;
use LWP::Simple;
use Encode::Encoder qw(encoder);
use HTML::Parse;
use HTML::FormatText;

our @ISA = qw/ EPrints::Plugin::Import /;

sub new
{
	my( $class, %params ) = @_;

	my $self = $class->SUPER::new( %params );

	$self->{name} = "ESSPER IDs";
	$self->{visible} = "all";
	$self->{produce} = [ 'list/eprint', 'dataobj/eprint' ];

	return $self;
}

sub input_fh
{
	my( $plugin, %opts ) = @_;

	my @ids;

	my $fh = $opts{fh};
	while( my $ids = <$fh> )
	{
		chomp $ids;
		my $browser = LWP::UserAgent->new;
		
		if ($ids =~ m/^http:.*/) {
		    my $query = CGI->new($ids);
		    $ids = $query->param('codice');
		}

		if (!($ids =~ /^\d+/)) {
		    $plugin->warning( "Invalid ID: ".$ids );
		    next;
		}
		
		if (length $ids < 1) {
		    $plugin->warning( "Sorry, cannot guess empty IDs !");
		    next;		
		}
		
		my $url = "http://www.biblio.liuc.it/scripts/essper/ricerca.asp?tipo=scheda&codice=".$ids;
		
		my $response = $browser->get( $url );

		if(!$response->is_success) {
		    $plugin->warning( "Error while fetching URL $url: ".$response->status_line );
		    next;
		}
		
		my $input_data = encoder(HTML::FormatText->new(leftmargin => 0, rightmargin => 256)->format(parse_html($response->content)))->latin1->utf8;
		
		my $epdata = $plugin->convert_input( $input_data );

		next unless( defined $epdata );
		
		# set url
		$epdata->{note} = "Imported from ESSPER (".$url.")";
		
		my $dataobj = $plugin->epdata_to_dataobj( $opts{dataset}, $epdata );
		if( defined $dataobj )
		{
			push @ids, $dataobj->get_id;
		}
		
	}
	return EPrints::List->new( 
		dataset => $opts{dataset}, 
		session => $plugin->{session},
		ids=>\@ids );
}

sub convert_input 
{
	my ( $plugin, $input_data ) = @_;

	my $epdata = ();
	my $fields = ();

	# Entry Type: always articles
	$epdata->{type} = "article";
	
	my @lines = split(/\n/,$input_data);
	
	foreach (@lines) {
	    my $line = $_;
	    if($line =~ m/^\s*(Autori|Titolo|Periodico|Anno|Volume|Fascicolo|Pagina iniziale|Pagina finale|SICI):\s*(.*)\s*$/) {
	        $fields->{$1} = $2;
	    }
	}
	# Cognome, Nome, Cognome, Nome ....
	my @authors = split(/,/, $fields->{Autori});

	for (my $i=0;$i <= @authors;$i += 2) {
		my $name = {};
		$name->{family} = $authors[$i] if defined $authors[$i];
		$name->{given} =  $authors[$i+1] if defined $authors[$i+1];
		push @{ $epdata->{creators_name} }, $name;
	}
    
	# title
	$epdata->{title} = $fields->{Titolo} if exists $fields->{Titolo};

	# journal
	$epdata->{publication} = $fields->{Periodico} if exists $fields->{Periodico};

	# extract ISSN from SICI
	if( exists $fields->{SICI} ) {
	    # SICI: 1127-5812(1997)10:1<117:UDSDS->2.0.ZU;2-Z
	    my $sici = $fields->{SICI};
	    if($sici =~ m/^(\d{4})-(\d{4}).*\s*$/) {
	    	$epdata->{issn} = "$1-$2";
	    }
	}

	# volume
	$epdata->{volume} = $fields->{Volume} if exists $fields->{Volume};

	# issue
	$epdata->{issue} = $fields->{Fascicolo} if exists $fields->{Fascicolo};

	# pages
	if( exists $fields->{"Pagina iniziale"} and exists $fields->{"Pagina finale"} )
	{
	    $epdata->{pagerange} = $fields->{"Pagina iniziale"}."-".$fields->{"Pagina finale"};
	}
	# year
	if( exists $fields->{"Anno"} )
	{
		my $year = $fields->{"Anno"};
		if( $year =~ /^[0-9]{4}$/ )
		{
			$epdata->{date} = $year;
		}
		else
		{
			$plugin->warning( $plugin->phrase( "skip_year", year => $year ) );
		}
	}

	# abstract
	$epdata->{abstract} = $fields->{"Abstract"} if exists $fields->{"Abstract"};

	return $epdata;
}

1;

