/* * To change this template, choose Tools | Templates * and open the template in the editor. */ package buzzerproxy; /** * * @author Enger * @see: http://www.rgagnon.com/javadetails/java-0639.html */ import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.net.URLDecoder; import java.util.List; import java.util.ArrayList; import java.util.logging.Level; import java.util.logging.Logger; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.HttpStatus; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.HttpClient; import org.apache.http.client.ResponseHandler; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.BasicResponseHandler; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager; import org.apache.http.protocol.BasicHttpContext; import org.apache.http.protocol.HttpContext; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; /** @see http://hc.apache.org/httpcomponents-client-ga/examples.html */ public class Http { private ArrayList validResultEntrys = new ArrayList(); public Http() { } public ArrayList getValidResults() { return validResultEntrys; } public void setValidResults(String url) { validResultEntrys.add(url); } public boolean isAlive(String urlToGet) throws ClientProtocolException, IOException { boolean isAlive = false; String urlGetMe = null; try { URL url = new URL(urlToGet); URI uri = new URI(url.getProtocol(), url.getHost(), url.getPath(), url.getQuery(), null); System.out.println("URI " + uri.toString() + " is OK"); urlGetMe = uri.toString(); } catch (MalformedURLException e) { System.out.println("URL " + urlToGet + " is a malformed URL"); } catch (URISyntaxException e) { System.out.println("URI " + urlToGet + " is a malformed URL"); } HttpClient client = new DefaultHttpClient(); HttpGet method = new HttpGet(urlGetMe); HttpResponse httpResponse = client.execute(method); int statusCode = httpResponse.getStatusLine().getStatusCode(); if (statusCode == HttpStatus.SC_OK) { isAlive = true; } return isAlive; } // public boolean isAlive(String url){ public String get(String urlToGet) throws ClientProtocolException { HttpClient httpclient = new DefaultHttpClient(); String responseBody = null; String urlGetMe = null; try { URL url = new URL(urlToGet); URI uri = new URI(url.getProtocol(), url.getHost(), url.getPath(), url.getQuery(), null); System.out.println("URI " + uri.toString() + " is OK"); urlGetMe = uri.toString(); } catch (MalformedURLException e) { System.out.println("URL " + urlToGet + " is a malformed URL"); } catch (URISyntaxException e) { System.out.println("URI " + urlToGet + " is a malformed URL"); } try { HttpGet httpget = new HttpGet(urlGetMe); System.out.println("executing request " + httpget.getURI()); // Create a response handler ResponseHandler responseHandler = new BasicResponseHandler(); try { responseBody = httpclient.execute(httpget, responseHandler); } catch (IOException ex) { Logger.getLogger(Http.class.getName()).log(Level.SEVERE, null, ex); } System.out.println("----------------------------------------"); System.out.println(responseBody); System.out.println("----------------------------------------"); } finally { // When HttpClient instance is no longer needed, // shut down the connection manager to ensure // immediate deallocation of all system resources httpclient.getConnectionManager().shutdown(); } return responseBody; } // public String get(String url) throws ClientProtocolException { public ArrayList extractLinks(String url) throws IOException { final ArrayList result = new ArrayList(); Document doc = Jsoup.connect(url).get(); Elements links = doc.select("a[href]"); // Elements media = doc.select("[src]"); // Elements imports = doc.select("link[href]"); // href ... for (Element link : links) { result.add(link.attr("abs:href")); } // // img ... // for (Element src : media) { // result.add(src.attr("abs:src")); // } // // js, css, ... // for (Element link : imports) { // result.add(link.attr("abs:href")); // } return result; }// public List extractLinks(String url) throws IOException { public void prepareParallelCheck(ArrayList urisToGet1, String filetyp) { // Create an HttpClient with the ThreadSafeClientConnManager. // This connection manager must be used if more than one thread will // be using the HttpClient. ThreadSafeClientConnManager cm = new ThreadSafeClientConnManager(); cm.setMaxTotal(10000); IO io = new IO(); String[] urisToGet = new String[urisToGet1.size()]; HttpClient httpclient = new DefaultHttpClient(cm); try { // create an array of URIs to perform GETs on // String[] urisToGet = { // "http://hc.apache.org/", // "http://hc.apache.org/httpcomponents-core-ga/", // "http://hc.apache.org/httpcomponents-client-ga/", // "http://svn.apache.org/viewvc/httpcomponents/" // }; int count = 0; String decodedURI = ""; for (String uri : urisToGet1) { try { decodedURI = URLDecoder.decode(uri, "UTF-8"); } catch (UnsupportedEncodingException ex) { Logger.getLogger(Http.class.getName()).log(Level.SEVERE, null, ex); } if (io.isValidFileTyp(uri, filetyp)) { System.out.println("VAlidFileTyp:" + decodedURI); urisToGet[count] = decodedURI; count++; } } // for ( String uri : urisToGet1){ //System.exit(1); // create a thread for each URI GetThread[] threads = new GetThread[urisToGet.length]; for (int i = 0; i < threads.length; i++) { HttpGet httpget = new HttpGet(urisToGet[i]); threads[i] = new GetThread(httpclient, httpget, i + 1); } // start the threads for (int j = 0; j < threads.length; j++) { threads[j].start(); } // join the threads for (int j = 0; j < threads.length; j++) { try { threads[j].join(); } catch (InterruptedException ex) { Logger.getLogger(Http.class.getName()).log(Level.SEVERE, null, ex); } } } finally { // When HttpClient instance is no longer needed, // shut down the connection manager to ensure // immediate deallocation of all system resources httpclient.getConnectionManager().shutdown(); } } // public void prepareParallelCheck(ArrayList urisToGet1 ){ /** * A thread that performs a GET. */ static class GetThread extends Thread { private final HttpClient httpClient; private final HttpContext context; private final HttpGet httpget; private final int id; Http h = new Http(); public GetThread(HttpClient httpClient, HttpGet httpget, int id) { this.httpClient = httpClient; this.context = new BasicHttpContext(); this.httpget = httpget; this.id = id; } /** * Executes the GetMethod and prints some status information. */ @Override public void run() { System.out.println(id + " - about to get something from " + httpget.getURI()); try { String url = httpget.getURI().toString(); boolean isAlive = false; try { isAlive = h.isAlive(url); } catch (ClientProtocolException ex) { Logger.getLogger(GoogleResultParser.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(GoogleResultParser.class.getName()).log(Level.SEVERE, null, ex); } if (isAlive) { h.setValidResults(url); } // // execute the method // HttpResponse response = httpClient.execute(httpget, context); // // System.out.println(id + " - get executed"); // // get the response body as an array of bytes // HttpEntity entity = response.getEntity(); // if (entity != null) { // byte[] bytes = EntityUtils.toByteArray(entity); // System.out.println(id + " - " + bytes.length + " bytes read"); // } } catch (Exception e) { httpget.abort(); System.out.println(id + " - error: " + e); } } // public void run() { } // static class GetThread extends Thread { } // public class Http { // public boolean isAlive(String url) { // // if (url == null || !(url instanceof String)) { // Exception e = new Exception(); // Logger.getLogger(Http.class.getName()).log(Level.SEVERE, null, e); // System.exit(1); // } // if ( str == null || !(str instanceof String)){ // // URL url1 = null; // try { // url1 = new URL(url); // } catch (MalformedURLException ex) { // Logger.getLogger(Http.class.getName()).log(Level.SEVERE, null, ex); // } // try { // url1 = new URL(url); // } catch (MalformedURLException ex) { // Logger.getLogger(Http.class.getName()).log(Level.SEVERE, null, ex); // } // HttpURLConnection code = null; // try { // code = (HttpURLConnection) url1.openConnection(); // } catch (IOException ex) { // Logger.getLogger(Http.class.getName()).log(Level.SEVERE, null, ex); // } // try { // System.out.println(code.getResponseCode()); // } catch (IOException ex) { // Logger.getLogger(Http.class.getName()).log(Level.SEVERE, null, ex); // } // return true; // } // public boolean isAlive(String url){