package de.mrbesen.youtubecrawler; import java.io.IOException; import java.util.LinkedList; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.log4j.Logger; public class CrawlerThread implements Runnable { private static Pattern linkpattern = Pattern.compile("watch\\?v=([-_a-zA-Z0-9]{11})"); private Logger log = Logger.getLogger(this.getClass().getName()); private Crawler parent; Thread thread; LinkedList todo = new LinkedList<>();//videos, this thread should crawl LinkedList crawled = new LinkedList<>();//videos this thread had crawled LinkedList found = new LinkedList<>();//videos this thread had found boolean requested = true;//is a request pending? boolean lockforreport = false; private int threadid; static { String libpath = System.getProperty("java.library.path"); libpath += ":./"; System.setProperty("java.library.path", libpath); System.loadLibrary("crawlerthread"); } public static native void initLib(int threadCount); public static native void deinitLib(); public CrawlerThread( Crawler root, int threadid) { parent = root; root.request(this); this.threadid = threadid; } void setThread(Thread t) { thread = t; } LinkedList undone() { return todo; } int undoneSize() { return todo.size(); } @Override public void run() { while(parent.isCrawling()) { while(!todo.isEmpty() && parent.isCrawling()) { if(lockforreport) { try { Thread.sleep(10); } catch(InterruptedException e) { lockforreport = false; } } String vid = todo.removeFirst(); // System.out.println("crawling: " + vid + " size: " + found.size()); crawled.add(vid); crawl(vid, threadid); if(todo.size() < parent.requestlimit && !requested) { requested = true; parent.request(this); } } if(todo.isEmpty() && !requested) { requested = true; parent.request(this); } log.warn("No Object left!"); Thread.yield(); try { Thread.sleep(10000);//sleep for 10 seconds } catch (InterruptedException ignored) {} } log.info("Stopped."); } /** * returns a linkedlist of all crawled videos * @return */ LinkedList[] report() { lockforreport = true; LinkedList[] out = new LinkedList[] {(LinkedList) crawled, (LinkedList) found}; crawled = new LinkedList<>(); found = new LinkedList<>(); lockforreport = false; thread.interrupt(); return out; } /* private void crawl(String videoid) { try { crawled.add(videoid); // log.info("crawling: " + videoid); HTTPS con = new HTTPS("https://youtube.com/watch?v=" + videoid); String s = con.getContent(); Matcher matcher = linkpattern.matcher(s); while(matcher.find()) { String ytid = matcher.group(1); if(!ytid.equals(videoid)) { found.add(ytid); } } } catch(IOException e) { e.printStackTrace(); } } */ private native void crawl(String videid, int threadid); }