use HTML::LinkExtractor;
use LWP::Simple; #     use LWP::Simple qw( get );
use Data::Dumper;
use HTML::Strip;

my @a=A..Z;

foreach $a (@a){
	my $base1 = "http://www.juraforum.de/lexikon/$a";
	next if $a eq "X";
	print "$base1\n";
	main($base1);
}

sub main(){
	
	my $base = shift;
	my $html = get($base);
	my $LX = new HTML::LinkExtractor();
	my $hs = HTML::Strip->new();
	
	$LX->parse(\$html);
	
	for my $Link( @{ $LX->links } ) {
	## new modules are linked  by /author/NAME/Dist
		if( $$Link{href}=~ m/lexikon\/[A-z]/ ) {
			my $clean_text = $hs->parse( $$Link{_TEXT} );
			chomp($clean_text);
			$clean_text = &trim($clean_text);
			$hs->eof; 
			#print Dumper $Link;
			#print $$Link{_TEXT}."\n";
			#my $file='I:\Working 2013\www.dowsery.com\tools\Framework\Prototyp\lexikon\lexi.cafe.txt';
			#my $file="I:\\Working 2013\\www\.dowsery\.com\\tools\\Framework\\Prototyp\\lexikon\\lexi.cafe.txt";
			#my $file="I:\\lexi.cafe.txt";
			if ($clean_text =~ m/(\-|\/|\s{1})/ig or length($clean_text) <3 ){
				next;
				#my @w=split(" - ", $clean_text);
				#$clean_text=@w[1];
			}
			#print "$clean_text\n";
			#next;
			open(W,"+>>lexi.forum.txt") or die "$!";
			binmode(W, ":utf8");
			print W "$clean_text\n";
			close W;
		}
	}
}#main

sub ltrim { my $s = shift; $s =~ s/^\s+//;       return $s };
sub rtrim { my $s = shift; $s =~ s/\s+$//;       return $s };
sub  trim { my $s = shift; $s =~ s/^\s+|\s+$//g; return $s };