#!/usr/bin/perl -I/root/zoozle/torrentbot/modules

my $VERION = "0.2";
require "/root/zoozle/new-bots-X/functions.pl";

# renicing to 20 !
system("renice 20 $$");
system("clear");

# module laden
use strict;
no strict "refs";
use IO::Handle;
use TorrentBot::HTTP::Get;
use TorrentBot::HTTP::Head;
use TorrentBot::LINK::ExtractorMem;
use Data::Dumper;

# objekte initialisieren
my $HEAD		= TorrentBot::HTTP::Head->new();
my $GET			= TorrentBot::HTTP::Get->new();
my $LINK		= TorrentBot::LINK::ExtractorMem->new();


# speicherschnittstellen initialisieren
my $AlreadSeenLinks;
my %AlreadSeenLinks		= ();
my @StatusCode			= ();
my @HOSTNAME			= ();
my @LinksToScan			= ();

my $StatusCode;
my $HOSTNAME;
my $TorrentDB;
my $TempStorePath;
my $BadUrls;
my $StatusFile;
my $pid;

my $StayOnSameHost		= 1;
my $AlreadyScannedLinks	= 0;
my $BadUrlCount			= 0;


my $torrentflatfile		= "/root/zoozle/new-bots-X/torrent_test.txt";
unlink $torrentflatfile;

# filedonkey emule

my %LinksToScanDE = (		# 27 torrents
		"http://pirlog.com/verified.php"			=> '\/torrent\/(\d{1,})',
		"http://pirlog.com/verified.php?pageno=2"	=> '\/torrent\/(\d{1,})',
		"http://pirlog.com/verified.php?pageno=3"	=> '\/torrent\/(\d{1,})',
		"http://pirlog.com/verified.php?pageno=4"	=> '\/torrent\/(\d{1,})',
		"http://pirlog.com/verified.php?pageno=5"	=> '\/torrent\/(\d{1,})',
		
		#""	 => '',	
);

# 0-110
Main();

&AddContentToSqlTORRENT($torrentflatfile);

sub Main(){

# nicht forken !

	while( my ($url,$regex) = each(%LinksToScanDE) ){
		&doScanning( $url, $regex, "en" );
	}; # while( my ($url,$goto) = each(%LinksToScan) ){
	
	sub doScanning(){

		my $UrltoScan		= shift;
		my $Regex		= shift;
		my $Lang		= shift;

		open(WH,"+>>$torrentflatfile");
		WH->autoflush(1);

		#for ( my $i = 1; $i <= 100; $i++) {

			my $Links			= &ScanningPage($UrltoScan,,);
			print "Scanning \"$UrltoScan\" \n";
	
			
			foreach my $link ( @{$Links} ) {
			
				my ( $LinkText, $ParrentUrl, $UrlToScan1 ) = split(' ### ', $link );
				
				chomp($LinkText);
				chomp($ParrentUrl);
				chomp($UrlToScan1);
				
				if ( $UrlToScan1 =~ /$Regex/i && $UrlToScan1 =~ /^http/i ) {
												
					$UrlToScan1 =~ s/\&hit=1//gi;
					$LinkText =~ s/(\.|\-|\+|_|�)/ /ig;
					$LinkText =~ s/(\d)\///ig;
					$LinkText =~ s/(\d)+$//ig;
					$LinkText =~ s/^\s+//;
					$LinkText =~ s/\s+$//;
									
					# �berspringe bereits gescannte linktexte
					next if ( exists $AlreadSeenLinks->{$LinkText} );
					$AlreadSeenLinks->{$LinkText} = 0;
	
					print WH "CATE=$Lang#DESC=$LinkText#LINK=$UrlToScan1\n";
					print "CATE=$Lang#DESC=$LinkText#LINK=$UrlToScan1\n";
	
				}; # if ( $UrlToScan1 =~ /$Regex/i  ) {
	
			#}; # foreach my $link ( @{$ExtractedLinks} ) {
	
	#	%AlreadSeenLinks		= ();
	#	return 1;

		}; # for ( my $i = 1; $i <= 110; $i++) {
	
	close WH;

	}; # sub doScanning(){

	return 1;

}; # sub Main(){

# gucken, ob morgen immer noch refused bei isohunt
# print &ScanningPage("http://www.zoozle.net/emule-bittorrent-download/gothic,torrent,,0.html",,);
# exit;


sub ScanningPage(){

	my $UrltoScan			= shift || return 0;
	my $ParrentUrl			= shift || "";
	my $LinkText			= shift || "";


	# bereits gescannte links nicht mehr scannen + keine forum links scannen
	return if ( exists $AlreadSeenLinks->{$UrltoScan} || $UrltoScan =~ /forum/ig );

	# markiere den link als bereits gescannt - tu das hierschon, weil wir sp�ter eine menge return anweisung haben
	# und wir ohne die anweisung hier, sp�ter eventuell zig links doppelt spidern w�rden
	$AlreadSeenLinks->{$UrltoScan} = 0;


	my $HeadHashResultRef	= $HEAD->Head( $UrltoScan );
	my $HeadStatusCode		= $HeadHashResultRef->{ 'STA' };
	my $ContentType			= $HeadHashResultRef->{ 'TYP' };
	my $ContentLength		= $HeadHashResultRef->{ 'LEN' };

	# splitte den status code auf, um sp�ter das erste zeichen davon herrauszuholen
	@StatusCode				= split('', $HeadHashResultRef->{ 'COD' } );

#	# kehre zur�ck, wenn kein g�ltiger header code ausgegeben wurde, der statuscode fehlerhaft ist oder content lenght zu klein ist
#	if ( $StatusCode[0] != 2 ){
#		print "something is wrong status '$StatusCode[0]'\n";
#		return;
#	} elsif ( $HeadStatusCode != 1 ){
#		print "something is wrong head '$HeadStatusCode'\n";
#		return;
#	};

	$AlreadyScannedLinks++;
	my $GetHashResultRef	= $GET->Get( $UrltoScan );
	my $ResponseContent		= $GetHashResultRef->{ 'CNT' };
	my $ResponseObj			= $GetHashResultRef->{ 'OBJ' };

	my $ScanLinkConfig = {
		"OBJ"	=> $ResponseObj,
		"CON"	=> $ResponseContent,
		"CDP"	=> 0,
		"SUD"	=> 1,
		"SDT"	=> 0,	# 0=CurrentDept | 1=dirdept
		"URL"	=> $UrltoScan,
		"PURL"	=> $ParrentUrl,
		"TXT"	=> $LinkText,
	};

#	use Data::Dumper;
#	print Dumper 

	return $LINK->HtmlLinkExtractor($ScanLinkConfig)
	
}; # sub ScanningPage(){



sub getNewsSex(){
	use LWP::Simple;

	open(WH,"+>>$torrentflatfile");
	my $page = get("http://sextorrent.to/index_in.php");
	my @CONTENT = split(/a href=\"/i, $page);
	foreach my $line ( @CONTENT ) {
		my $count++;
		if ( $line =~ /file\.php\?id=/i ) {	# http://sextorrent.to/file.php?id=13911&name=Mandy%20on%20Tour
			$line =~ s/amp;//i;

			my ( $link, undef ) = split('"', $line);
			my ( undef, $desc )		= split(/&name=/i, $link );
			
		#	print "LINK=$link und $desc\n";
			chomp($desc);
			next if ( $link =~ /java/ig );
			if ( ($desc =~ /(\w)/i) && ( $line =~ /(\w)/ ) ) {
				print WH "CATE=de#DESC=$desc#LINK=http://sextorrent.to/$link\n";
			};
		}; # if $line
	}; # foreach
	
	close WH;
	return 1;

}; # sub getNewsSex(){


# dead:	"http://dl-torrent.com/"																		=> 'torrents\.php\?mode=details\&id=',					# http://dl-torrent.com/torrents.php?mode=details&id=fee0d8b4588effcf5007ee7ca78f14336697df7a
	# DEAD: "http://francepartage.com/upload/browse.php?page=1"												=> 'details\.php\?id=',									# http://francepartage.com/upload/details.php?id=814

	#	"http://torrentmatrix.com/torrents-search.php?search=french&cat=0&cat=0&incldead=0&inclexternal=0"		=> 'torrents-details\.php\?id=',						# http://torrentmatrix.com/torrents-details.php?id=73347&hit=1
	#	"http://all-torrent.net/"																				=> 'index.php\?option=com_torrenttrader\&action=view',	# http://all-torrent.net/index.php?option=com_torrenttrader&action=view&id=351&Itemid=28
	#	"http://www.torrentportal.com/torrents-search.php?search=francais"										=> '\/details\/',
	#	"http://extratorrent.com/search/?new=1&search=francais&s_cat=0"											=> '\/torrent\/',
	#	"http://thepiratebay.org/search/francais/0/0/0"															=> '\/tor\/',
	#	"http://torrentspy.com/search?query=francais&submit.x=0&submit.y=0"										=> '\/torrent\/',