package de.mrbesen.youtubecrawler; import java.io.IOException; import java.util.LinkedList; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.log4j.Logger; public class CrawlerThread implements Runnable { private static Pattern linkpattern = Pattern.compile("watch\\?v=([-_a-zA-Z0-9]{11})"); private Logger log = Logger.getLogger(this.getClass().getName()); private Crawler parent; Thread thread; LinkedList todo = new LinkedList<>();//videos, this thread should crawl List crawled = new LinkedList<>();//videos this thread had crawled List found = new LinkedList<>();//videos this thread had found static AtomicInteger fails = new AtomicInteger(0); private static int MAXFAILS = 100; boolean requested = true;//is a request pending? private int threadid; static { String libpath = System.getProperty("java.library.path"); libpath += ":./"; System.setProperty("java.library.path", libpath); System.loadLibrary("crawlerthread"); } public static native void initLib(int threadCount); public static native void deinitLib(); public CrawlerThread( Crawler root, int threadid) { parent = root; root.request(this); this.threadid = threadid; } void setThread(Thread t) { thread = t; } LinkedList undone() { return todo; } int undoneSize() { return todo.size(); } @Override public void run() { while(parent.isCrawling()) { synchronized (this) { while (!todo.isEmpty() && parent.isCrawling()) { String vid = todo.removeFirst(); // System.out.println("crawling: " + vid + " size: " + found.size()); crawled.add(vid); boolean success = crawl(vid, threadid); if (todo.size() < parent.requestlimit && !requested) { requested = true; parent.request(this); } if (!success) { int val = fails.addAndGet(1); if (val > MAXFAILS) { System.err.println("Max Crawlfails reached, stopping"); parent.stop(); break; } } } if (todo.isEmpty() && !requested) { requested = true; parent.request(this); } log.warn("No Object left!"); Thread.yield(); try { Thread.sleep(10000);//sleep for 10 seconds } catch (InterruptedException ignored) { } } } log.info("Stopped."); } /** * returns a list of all crawled videos * @return */ List[] report() { synchronized (this) { List[] out = new List[]{crawled, found}; crawled = new LinkedList<>(); found = new LinkedList<>(); return out; } } /* private void crawl(String videoid) { try { crawled.add(videoid); // log.info("crawling: " + videoid); HTTPS con = new HTTPS("https://youtube.com/watch?v=" + videoid); String s = con.getContent(); Matcher matcher = linkpattern.matcher(s); while(matcher.find()) { String ytid = matcher.group(1); if(!ytid.equals(videoid)) { found.add(ytid); } } } catch(IOException e) { e.printStackTrace(); } } */ // returns false when it fails private native boolean crawl(String videid, int threadid); }