use HTML::LinkExtractor;
use LWP::Simple; #     use LWP::Simple qw( get );
use Data::Dumper;
use HTML::Strip;

my $base = 'http://www.juracafe.de/cms/front_content.php?idart=149';
my $html = get($base);
my $LX = new HTML::LinkExtractor();
my $hs = HTML::Strip->new();

$LX->parse(\$html);

for my $Link( @{ $LX->links } ) {
## new modules are linked  by /author/NAME/Dist
	if( $$Link{href}=~ m/idcat/ and $$Link{href}=~ m/idart/ ) {
		my $clean_text = $hs->parse( $$Link{_TEXT} );
		chomp($clean_text);
		$clean_text = &trim($clean_text);
		$hs->eof;
		if ($clean_text =~ m/(\-|\/|\s{1})/ig or length($clean_text) <3 ){
			next;
			#my @w=split(" - ", $clean_text);
			#$clean_text=@w[1];
		}
			
		#print Dumper $Link;
		#print $$Link{_TEXT}."\n";
		#my $file='I:\Working 2013\www.dowsery.com\tools\Framework\Prototyp\lexikon\lexi.cafe.txt';
		#my $file="I:\\Working 2013\\www\.dowsery\.com\\tools\\Framework\\Prototyp\\lexikon\\lexi.cafe.txt";
		my $file="I:\\lexi.cafe.txt";
		open(W,"+>>lexi.cafe.txt") or die "$!";
		binmode(W, ":utf8");
		print W "$clean_text\n";
		close W;
	}
}

sub ltrim { my $s = shift; $s =~ s/^\s+//;       return $s };
sub rtrim { my $s = shift; $s =~ s/\s+$//;       return $s };
sub  trim { my $s = shift; $s =~ s/^\s+|\s+$//g; return $s };