#!/usr/bin/perl

# Copyright (C) 2002 Rodrigo Araujo Real
     
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with GNU Emacs; see the file COPYING.  If not, write to
# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.    

# Thanks to:
# Otavio Salvador otavio at debian.org
# Pietro Abate Pietro.Abate at anu.edu.au

use HTTP::Request::Common;
use LWP::UserAgent;

$Ua = LWP::UserAgent->new;
$Ua->agent("Mozilla (compatible: LWP $LWP::VERSION)");


sub getbib {
    $bib=0;
    $entry="";
    $url=shift;
    my $req = new HTTP::Request GET => $url;
    $res = $Ua->request($req);
    $str=$res->content;

    foreach $line (split /\r?\n/,$str)  {
	$_=$line;
	if ($bib) {
	    if (/\<\!\-\-kbr\-\-\>/) {
		$bib=0;
	    } else {
		$entry.=$_;
		$entry.="\n";
		$entry=~s/(.*)\<\/pre\>.*/\1/;
	    }
	} else {
	    if (/\<\!\-\-gbr2\-\-\>/) {
		$bib=1;
		$entry=$_;
		$entry.="\n";
		$entry=~s/.*\<pre\>(.*)/\1/;
	    }
	}
    }
    print $entry;
    print "\n\n";
}


$res = $Ua->request(POST 'http://citeseer.ist.psu.edu/cis', [q => $ARGV[0], submit => "Search+Documents", cs => "1" , co => "Expected Citations", cm => "50" , cf => "Any" , ao => "Expected Citations", am => "20" , af => "Any"]);

$str=$res->content;

print "------------> Search Expression: $ARGV[0] Results <------------\n";

$zone=0;
## getting URL
foreach $line (split /\r?\n/,$str)  {
#    print "DEBUG: $str";
    $_=$line;
    if ($zone) {
	if (/<a href=\".*html\"\>/) {
#	    print "DEBUG: $_";
	    s/<!--RIS--><a href=\"(.*html)\"\>.*/\1/;
	    getbib $_;
	} elsif (/.*Try your query at:.*/) {
	    $zone=0;
	}
    } else {
	if (/.*documents.*/) {
	    $zone=1;
	}
    }
}


exit(0);

__END__

=head1 NAME

search-citeseer

=head1 SYNOPSIS

search-citeseer <expression>

=head1 DESCRIPTION

This Perl script helps searching Citeseer
(http://citeseer.nj.nec.com/cs) for BibTeX entries. With it you can
perform a query without browser, directly from the command line.

=head1 EXAMPLE

search-citeseer "operating systems"

=head1 CONTRIBUTORS

Mario Domenech Goulart, Otavio Salvador

=head1 SEE ALSO

Citeseer - Scientific Literature Digital Library
http://citeseer.nj.nec.com/cs

The Collection of Computer Science Bibliographies
http://liinwww.ira.uka.de/bibliography/index.html

Search-CCSB
http://www.inf.ufrgs.br/~rreal/search-ccsb.html

=head1 COPYRIGHT

Copyright 2002 Rodrigo Araujo Real.

=cut
