#!/usr/bin/perl -Imodules

#########################################
##### Author:		Sebastian Enger / B.Sc
##### CopyRight:	Sebastian Enger
##### LastModified	16.07.2006
##### Function:		
##### Todo:			
########################################

#############
###### run from path: /root/zoozle/indexer/GenerellSpider
#############

# sleep 1000 && run / alle 21 min nach neuen torrents suchen


# perl -MCPAN -e 'force install "XML::TreePP"'
# perl -MCPAN -e 'force install "APR::URI"'
# perl -MCPAN -e 'force install "Crypt::SSLeay"'

system("clear");

use strict;
use IO::Handle;
use Data::Dumper;
use Net::SCP::Expect;
use TorrentIndexer::HTTP::Get;
use TorrentIndexer::HTTP::Head;
use TorrentIndexer::LINK::Extractor;


###########
### objekte initialisieren
###########

my $HEAD		= TorrentIndexer::HTTP::Head->new();
my $GET			= TorrentIndexer::HTTP::Get->new();
my $LINK		= TorrentIndexer::LINK::Extractor->new();

#########
### MAIN
#########

my $CachePath				= "/root/zoozle/indexer/torrentindexer/xmlfeeds/cache";	
my $TorrentStoreFlatFile	= "$CachePath/rss-torrent-feeds.txt";
my $LinksToScan				= "$CachePath/tmp-rss-feed.txt";

mkdir $CachePath;
unlink $LinksToScan;
unlink $TorrentStoreFlatFile;


my %RSSFeeds = (
	"1" => 'xmlfeeds/snarf.txt',
	"2" => 'xmlfeeds/mybittorrent.txt',
	"3" => 'xmlfeeds/fenopy.txt',
	"4" => 'xmlfeeds/demonoid.txt',
	"5" => 'xmlfeeds/bush.txt',
	"6" => 'xmlfeeds/torrentportal.txt',
	"7" => 'xmlfeeds/torrentreactor.txt',
	"8" => 'xmlfeeds/piratebay.txt',
	"9" => 'xmlfeeds/bt-chat.txt',
	"10" => 'xmlfeeds/torrentloco.txt',
	"11" => 'xmlfeeds/btjunkie.txt',
	"12" => 'xmlfeeds/fullddl.txt',
	"13" => 'xmlfeeds/matrix.txt',
	"14" => 'xmlfeeds/spy.txt',
	"15" => 'xmlfeeds/2torrent.txt',
	"16" => 'xmlfeeds/bitnova.txt',
	"17" => 'xmlfeeds/meganova.txt',
	"18" => 'xmlfeeds/mininova.txt',
	"19" => 'xmlfeeds/mono.txt',
	"20" => 'xmlfeeds/newt.txt',
	"21" => 'xmlfeeds/torrentbox.txt',
	"22" => 'xmlfeeds/torrentvalley.txt',
	"23" => 'xmlfeeds/isohunt.txt',
	"24" => 'xmlfeeds/legal.txt',
	"25" => 'xmlfeeds/bitreactor.txt'
);


while( my ($key,$val) = each(%RSSFeeds) ) {

	open(FEEDS,"<$val") or warn $!;
		
		while (<FEEDS>) {
			next if ( $_ =~ /^#/ );
			my $Link = $_;
			$Link =~ s/\s//;
			
			my $category;

			if ( $key != 25 ) {
				$category = "en"; 
			} elsif ( $key == 25 ) {
				$category = "de"; 
			};
		
			#print "Scanne $Link - $category\n";
			
			&Scanner( $Link ) if ( $Link =~ /^(http|https|ftp|ed2k)/i );
			&GenerateFlatFile( $LinksToScan, $category );
		
		}; # while

	close FEEDS;

}; # while( $key,$val = each(%RSSFeeds) ) {


&SwarmFilesToSlaves();						# verteile die neue sql db an die mirrors
&AddContentToSql($TorrentStoreFlatFile);	# füge content zur sql db hinzu

my $cur = localtime();
print "[$cur] Done Adding Content\n";


sub GenerateFlatFile(){

	my $File	= shift;
	my $cat		= shift;

	my $count	= 0;

	open(TORRENT,">>$TorrentStoreFlatFile") or warn;
		TORRENT->autoflush(1);
		TORRENT->blocking(0);
		binmode TORRENT, ":utf8";
		flock(TORRENT, 2);

	
	open(RH,"<$File") or warn;
		while ( <RH> ) {
			my ( $LinkDesc, $ParentLink, $LinkName ) = split(' ### ', $_);
			chomp($LinkDesc, $ParentLink, $LinkName);
			print TORRENT "CATE=$cat#DESC=$LinkDesc#LINK=$LinkName\n";
			$count++;
		}; # while ( <RH> ) {
	close RH;
	close TORRENT;

	unlink $File;

	print "[$count] Torrent Files\n";
	return 1;

}; # sub GenerateFlatFile(){



sub Scanner(){

	my $CurrentUrlToScan = shift;
	
	print "SCANNE: '$CurrentUrlToScan'\n";
	
	my $GetHashResultRef	= $GET->Get($CurrentUrlToScan);
	my $ResponseObj			= $GetHashResultRef->{ 'OBJ' };
	my $ResponseContent		= $GetHashResultRef->{ 'CNT' };

	my $ScanLinkConfig = {
		"OBJ" => $ResponseObj,
		"CON" => $ResponseContent,
	};

	$LINK->XmlLinkExtractor( $ScanLinkConfig, $LinksToScan );

	return 1;

}; # sub Scanner(){



sub SwarmFilesToSlaves(){

	my %ConfigHash =(
		"1"	=> my $CfG1 = { 
				"HOST"	=> "85.214.66.15",	# fishwarez
				"USER"	=> "root",
				"PASS"	=> "######################!1962",
				"PATH"	=> "/root/zoozle/updates/"
				},
		#	"2"	=> my $CfG2 = { 
		#			"HOST"	=> "HOSTIP",
		#			"USER"	=> "USERNAME",
		#			"PASS"	=> "PASSWORD",
		#			"PATH"	=> "/root/.mutella"	 # wo soll es gespeichert werden
		#			}, 
	);

	# von A nach B kopieren
	while ( my ($key, $value) = each(%ConfigHash) ) {

		my $RemoteUserName	= $value->{ 'USER' };
		my $RemotePassword	= $value->{ 'PASS' };
		my $RemoteHostname	= $value->{ 'HOST' };
		my $RemoteStorePath	= $value->{ 'PATH' };

		# print "SCP: $RemoteUserName $RemotePassword $RemoteHostname $RemoteStorePath\n";

		my $SCP = Net::SCP::Expect->new(
			host		=> "$RemoteHostname",
			user		=> "$RemoteUserName",
			password	=> "$RemotePassword" );

		$SCP->auto_yes(1);
		$SCP->scp("$TorrentStoreFlatFile","$RemoteUserName\@$RemoteHostname:$RemoteStorePath");
		undef $SCP;

	}; # while (my ($key, $value) = each(%ConfigHash) ) {}
	
	my $now_string = localtime;
	print "$now_string: I swarmed the files to slaves\n";

}; # sub SwarmFilesToSlaves(){




sub AddContentToSql() {

	my $filename	= shift;
	my $date		= &getDATE();

	use DBI();
	my $drh			= DBI->install_driver("mysql");

	my $DBHOST		= "localhost";
	my $DBNAME		= "zoozle";
	my $DBUSER		= "zoozle";	
	my $DBPASS		= "zoozle23!99";	

	my $dbh			= DBI->connect("DBI:mysql:database=$DBNAME;host=$DBHOST", "$DBUSER", "$DBPASS", {'RaiseError' => 0});
	my $count		= 0;

	open(RH1,"<$filename") or warn "$! - $filename \n";

		foreach (<RH1>) {
			
			my ( $cat, $desc, $link ) = split('#', $_);
			$link =~ s/LINK=//;
			$desc =~ s/DESC=//;
			$cat =~ s/CATE=//;
			
			next if ( $_ !~ /^CATE=/ );
			next if ( $_ =~ /Torrent does not exist/ig );
			next if ( $link =~ /megawerbung/);
			
			$cat	= &deleteSpecialChars($cat);
			$desc	= &deleteSpecialChars($desc);
			$count++;

			$desc =~ s/&+(\w)+;/ /i;
			$desc =~ s/&#+(\w)+;/ /i;
			$desc =~ s/&+(\d)+;/ /i;
			$desc =~ s/&#+(\d)+;/ /i;

			chomp($cat, $desc, $link);
					
			$dbh->do( 
				qq {
					INSERT DELAYED INTO `torrent7` ( `DATE` , `CATG` , `DESC` , `LINK` )
						VALUES (
							'$date', '$cat', '$desc', '$link' );
				});

					
			$dbh->do( 
				qq {
					INSERT DELAYED INTO `torrent_news3` ( `UNID`, `DATE` , `CATG` , `DESC` , `LINK` )
						VALUES (
							'', '$date', '$cat', '$desc', '$link' );
				});

			$dbh->do( 
				qq {
					INSERT DELAYED INTO `torrent_news7` ( `DATE` , `CATG` , `DESC` , `LINK` )
						VALUES (
							'$date', '$cat', '$desc', '$link' );
				});

		};# foreach(){}

		print "Done reading $filename with $count Entries\n";
	close RH1;

}; # sub AddContentToSql() {


sub deleteSpecialChars() {

	my $del_badchar = shift;
	
		$del_badchar =~ s/&nbsp//g;
		$del_badchar =~ s/nbsp//g;
		$del_badchar =~ s/\`/ /g;
		$del_badchar =~ s/\'/ /g;
		$del_badchar =~ s/\?/+/g;
		$del_badchar =~ s/%//g;
		$del_badchar =~ s/$//g;
		$del_badchar =~ s/§//g;
		$del_badchar =~ s/!//g;
		$del_badchar =~ s/&//g;
		$del_badchar =~ s/\{//g;
		$del_badchar =~ s/\}//g;
		$del_badchar =~ s/\(//g;
		$del_badchar =~ s/\)//g;
		$del_badchar =~ s/\[//g;
		$del_badchar =~ s/\]//g;
		$del_badchar =~ s/\=//g;
		$del_badchar =~ s/\\//g;
		$del_badchar =~ s/\#//g;
		$del_badchar =~ s/\,//g;
		$del_badchar =~ s/\;//g;
		$del_badchar =~ s/\|//g;
		$del_badchar =~ s/\</ /g;
		$del_badchar =~ s/\>/ /g;
		$del_badchar =~ s/\///g;
		$del_badchar =~ s/°//g;
		$del_badchar =~ s/\^//g;
		$del_badchar =~ s/ß/ss/g;
		$del_badchar =~ s/\|//g;
		$del_badchar =~ s/=&szlig;=/ss/g;
		$del_badchar =~ s/\./ /g;
		$del_badchar =~ s/\-/ /g;
		$del_badchar =~ s/ g / /g;
		$del_badchar =~ s/nbsp/ /g;
		$del_badchar =~ s/\+/ /g;
		$del_badchar =~ s/\*/ /g;
		$del_badchar =~ s/\&+\#+(\d)+\;/ /g;

	return ($del_badchar);
	
}; # sub deleteSpecialChars() {}


sub getDATE() {

	my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time);
	$year = $year+1900;
	$mon = $mon + 1;

	if ( length($mon) == 1 ) {
		$mon = "0". $mon;
	};
	if ( length($mday) == 1 ) {
		$mday = "0". $mday;
	};


	my $date = $year ."-". $mon ."-". $mday;

	return $date;

}; # sub getDATE() {

