#!/usr/bin/perl -I/root/zoozle/torrentbot/modules

my $VERION = "0.2";

# renicing to 20 !
system("renice 20 $$");
system("clear");

# module laden
use strict;
no strict "refs";
use Data::Dumper;
use Digest::MD5 qw( md5_hex );
use TorrentBot::HTTP::Get;
use TorrentBot::HTTP::Head;
use TorrentBot::LINK::ExtractorMem;	# rel nofollow links ingnored
use TorrentBot::HTML::HtmlParser;


# objekte initialisieren
my $HEAD		= TorrentBot::HTTP::Head->new();
my $GET			= TorrentBot::HTTP::Get->new();
my $HTML		= TorrentBot::HTML::HtmlParser->new();
my $LINK		= TorrentBot::LINK::ExtractorMem->new();


# speicherschnittstellen initialisieren
my $AlreadSeenLinks;
my %AlreadSeenLinks		= ();
my @StatusCode			= ();
my @HOSTNAME			= ();
my @LinksToScan			= ();

my $StatusCode;
my $HOSTNAME;
my $TorrentDB;
my $TempStorePath;
my $BadUrls;
my $StatusFile;
my $pid;

my $StayOnSameHost		= 1;
my $AlreadyScannedLinks	= 0;
my $BadUrlCount			= 0;


my $torrentflatfile		= "/root/zoozle/new-bots-fr/complete-emule/donkey1.txt";

my %LinksToScan = (
	"http://www.letoutedonkey.com/filma.php"	 => "0",	# 1x54
	"http://www.letoutedonkey.com/filma.php"	 => "9",
	"http://www.letoutedonkey.com/filmb.php"	 => "9",
	"http://www.letoutedonkey.com/filmc.php"	 => "9",
	"http://www.letoutedonkey.com/filmd.php"	 => "9",
	"http://www.letoutedonkey.com/filme.php"	 => "9",
	"http://www.letoutedonkey.com/filmf.php"	 => "9",
	"http://www.letoutedonkey.com/filmg.php"	 => "9",
	"http://www.letoutedonkey.com/filmh.php"	 => "9",
	"http://www.letoutedonkey.com/filmi.php"	 => "9",
	"http://www.letoutedonkey.com/filmj.php"	 => "9",
	"http://www.letoutedonkey.com/filmk.php"	 => "9",
	"http://www.letoutedonkey.com/filml.php"	 => "9",
	"http://www.letoutedonkey.com/filmm.php"	 => "9",
	"http://www.letoutedonkey.com/filmn.php"	 => "9",
	"http://www.letoutedonkey.com/filmo.php"	 => "9",
	"http://www.letoutedonkey.com/filmp.php"	 => "9",
	"http://www.letoutedonkey.com/filmq.php"	 => "9",
	"http://www.letoutedonkey.com/filmr.php"	 => "9",
	"http://www.letoutedonkey.com/films.php"	 => "9",
	"http://www.letoutedonkey.com/filmt.php"	 => "9",
	"http://www.letoutedonkey.com/filmu.php"	 => "9",
	"http://www.letoutedonkey.com/filmv.php"	 => "9",
	"http://www.letoutedonkey.com/filmw.php"	 => "9",
	"http://www.letoutedonkey.com/filmx.php"	 => "9",
	"http://www.letoutedonkey.com/filmy.php"	 => "9",
	"http://www.letoutedonkey.com/filmz.php"	 => "9",
	"http://www.letoutedonkey.com/dessinanime.php"	 => "9",
	"http://www.letoutedonkey.com/documentaire.php"	 => "",
	"http://www.letoutedonkey.com/serietv.php"	 => "",
	"http://www.letoutedonkey.com/musiqueal.php"	 => "",
	"http://www.letoutedonkey.com/musiquea.php"	 => "",
	"http://www.letoutedonkey.com/musiqueb.php"	 => "",
	"http://www.letoutedonkey.com/musiquec.php"	 => "",
	"http://www.letoutedonkey.com/musiqued.php"	 => "",
	"http://www.letoutedonkey.com/musiquee.php"	 => "",
	"http://www.letoutedonkey.com/musiquef.php"	 => "",
	"http://www.letoutedonkey.com/musiqueg.php"	 => "",
	"http://www.letoutedonkey.com/musiqueh.php"	 => "",
	"http://www.letoutedonkey.com/musiquei.php"	 => "",
	"http://www.letoutedonkey.com/musiquej.php"	 => "",
	"http://www.letoutedonkey.com/musiquek.php"	 => "",
	"http://www.letoutedonkey.com/musiquel.php"	 => "",
	"http://www.letoutedonkey.com/musiquem.php"	 => "",
	"http://www.letoutedonkey.com/musiquen.php"	 => "",
	"http://www.letoutedonkey.com/musiqueo.php"	 => "",
	"http://www.letoutedonkey.com/musiquep.php"	 => "",
	"http://www.letoutedonkey.com/musiqueq.php"	 => "",
	"http://www.letoutedonkey.com/musiquer.php"	 => "",
	"http://www.letoutedonkey.com/musiques.php"	 => "",
	"http://www.letoutedonkey.com/musiquet.php"	 => "",
	"http://www.letoutedonkey.com/musiqueu.php"	 => "",
	"http://www.letoutedonkey.com/musiquev.php"	 => "",
	"http://www.letoutedonkey.com/musiquw.php"	 => "",
	"http://www.letoutedonkey.com/musiqux.php"	 => "",
	"http://www.letoutedonkey.com/musiquy.php"	 => "",
	"http://www.letoutedonkey.com/musiquz.php"	 => "",
	"http://www.letoutedonkey.com/albumfr.php"	 => "",
	"http://www.letoutedonkey.com/albumvo.php"	 => "",
	"http://www.letoutedonkey.com/concert.php"	 => "",
	"http://www.letoutedonkey.com/jeuxpc.php"	 => "",
	"http://www.letoutedonkey.com/logiciel.php"	 => "",
#	""	 => "",
#	""	 => "",
#	""	 => "",
	
);


Main();

sub Main(){

# nicht forken !


	while( my ($url,$goto) = each(%LinksToScan) ){
		&doScanning( $url, $goto );
	}; # while( my ($url,$goto) = each(%LinksToScan) ){

	
	sub doScanning(){

		my $UrltoScan		= shift;
		my $GotToPage		= shift;

		my $url		= $UrltoScan;
		
		my $GetHashResultRef	= $GET->Get( $url );
		my $ResponseContent		= $GetHashResultRef->{ 'CNT' };

		open(WH,"+>>$torrentflatfile");
		my @cnt = split(/<font color=\"#c00000\">Titre:<\/font><font color=\"#ffffff\">/i, $ResponseContent);
		foreach my $line (@cnt) {
			my ($good,undef)= split(/<\/font><font color=\"#ffffff\"><br>/i, $line);
			$good =~ s/<\/font><font color=\"#000080\">//ig;
			
			if (length $good <= 100) {
				$good =~ s/^\s+//;
				$good =~ s/\s+$//;
				print WH "CATE=fr#DESC=$good#LINK=$UrltoScan\n";
			};

		}; # foreach my $line (@cnt) {
		close RH;
			
		return 1;

	}; # sub doScanning(){

	return 1;

}; # sub Main(){

# gucken, ob morgen immer noch refused bei isohunt
# print &ScanningPage("http://www.zoozle.net/emule-bittorrent-download/gothic,torrent,,0.html",,);
# exit;


sub ScanningPage(){

	my $UrltoScan			= shift || return 0;
	my $ParrentUrl			= shift || "";
	my $LinkText			= shift || "";


	# bereits gescannte links nicht mehr scannen + keine forum links scannen
	return if ( exists $AlreadSeenLinks->{$UrltoScan} || $UrltoScan =~ /forum/ig );

	# markiere den link als bereits gescannt - tu das hierschon, weil wir später eine menge return anweisung haben
	# und wir ohne die anweisung hier, später eventuell zig links doppelt spidern würden
	$AlreadSeenLinks->{$UrltoScan} = 0;


	my $HeadHashResultRef	= $HEAD->Head( $UrltoScan );
	my $HeadStatusCode		= $HeadHashResultRef->{ 'STA' };
	my $ContentType			= $HeadHashResultRef->{ 'TYP' };
	my $ContentLength		= $HeadHashResultRef->{ 'LEN' };

	# splitte den status code auf, um später das erste zeichen davon herrauszuholen
	@StatusCode				= split('', $HeadHashResultRef->{ 'COD' } );

	# kehre zurück, wenn kein gültiger header code ausgegeben wurde, der statuscode fehlerhaft ist oder content lenght zu klein ist
	if ( $StatusCode[0] != 2 ){
		print "something is wrong status '$StatusCode[0]'\n";
		return;
	} elsif ( $HeadStatusCode != 1 ){
		print "something is wrong head '$HeadStatusCode'\n";
		return;
	};

	$AlreadyScannedLinks++;
	my $GetHashResultRef	= $GET->Get( $UrltoScan );
	my $ResponseContent		= $GetHashResultRef->{ 'CNT' };
	my $ResponseObj			= $GetHashResultRef->{ 'OBJ' };
	my $PageTitle			= $HTML->getPageTitle( \$ResponseContent );
	
	print "$PageTitle :: $UrltoScan\n";

	my $ScanLinkConfig = {
		"OBJ"	=> $ResponseObj,
		"CON"	=> $ResponseContent,
		"CDP"	=> 0,
		"SUD"	=> 1,
		"SDT"	=> 0,	# 0=CurrentDept | 1=dirdept
		"URL"	=> $UrltoScan,
		"PURL"	=> $ParrentUrl,
		"TXT"	=> $LinkText,
	};

#	use Data::Dumper;
#	print Dumper 

	return $LINK->HtmlLinkExtractor($ScanLinkConfig)
	
}; # sub ScanningPage(){