diff --git a/src/de/mrbesen/youtubecrawler/Crawler.java b/src/de/mrbesen/youtubecrawler/Crawler.java index 622a1a5..bc0707c 100644 --- a/src/de/mrbesen/youtubecrawler/Crawler.java +++ b/src/de/mrbesen/youtubecrawler/Crawler.java @@ -137,119 +137,137 @@ public class Crawler implements Runnable { } while(crawl) { log.info("to Crawl: " + toCrawl.size() + " known: " + toknown.size() + " Time: " + dateform.format(new Date())); - - //fullfill request - while(!requested.isEmpty() && !toCrawl.isEmpty() && crawl) { - log.info("fullfill request"); - currentstate = "fullfill requests"; - send(requested.remove(0)); - } - - //kindof idle - while(toCrawl.size() > (jobspeerthread * threads.size()) && crawl && requested.isEmpty()) { - startup = 0;//stop startup count - currentstate = "idle"; - Thread.yield(); - try { - Thread.sleep(100); - } catch(InterruptedException ignored) { - break; - } - // updateDB(); - } - //nothing left? - if(toknown.isEmpty() && toCrawl.isEmpty() && requested.size() == threads.size()) {//very uncommon - log.warn("nothing left to crawl"); - crawl = false; - } - - //refil the tocrawl list. - if(!toknown.isEmpty()) { - //check in db for known videos - log.info("Checking the DB"); - currentstate = "get new tocrawl"; - // listlock.writeLock().lock(); - while(toCrawl.size() < jobspeerthread * threads.size() * 2 && crawl && !toknown.isEmpty()) { - LinkedList tocheck = new LinkedList<>(); - for(int i = 0; i < toknown.size() && i < maxvideostotest; i++) { - tocheck.add(toknown.removeFirst()); - } - toCrawl.addAll(db.checkvideos(tocheck)); - } - // listlock.writeLock().unlock(); - } - if(toknown.size() < threadcount * jobspeerthread * 20 && crawl) { - currentstate = "restoretemp"; - log.info("restoreTemp"); - LinkedList rest = db.restoreTemp(); - toknown.addAll(rest); - } - - //writing crawlfile - log.info("Writing Crawlfile"); - currentstate = "writing crawlfile"; - // listlock.writeLock().lock(); try { - PrintWriter p = new PrintWriter(new BufferedWriter(new FileWriter(crawlfile))); - for(String t : toCrawl) { - p.println(t); + //fullfill request + while(!requested.isEmpty() && !toCrawl.isEmpty() && crawl) { + log.info("fullfill request"); + currentstate = "fullfill requests"; + send(requested.remove(0)); } - p.println("-"); - for(String t : toknown) { - p.println(t); - } - p.close(); - } catch (IOException e) { - log.error("Error writing crawlfile.", e); - } - //get reports - currentstate = "get report"; - log.info("get report"); - int count = 0; - for (CrawlerThread crawlerThread : threads) { - LinkedList[] report = crawlerThread.report(); - crawlcount+= report[0].size(); - toSave.addAll(report[0]); - crawlerThread.crawled.clear(); - while(report[1].size() > 0) { - LinkedList store = new LinkedList<>(); + //kindof idle + while(toCrawl.size() > (jobspeerthread * threads.size()) && crawl && requested.isEmpty()) { + startup = 0;//stop startup count + currentstate = "idle"; + Thread.yield(); try { - while(!report[1].isEmpty() && store.size() < 50) { - store.add(report[1].removeFirst()); - - count++; - } - } catch(NoSuchElementException ignored) {//concurrentmodification fuckery + Thread.sleep(100); + } catch(InterruptedException ignored) { + break; } - db.storeTemp(store); + // updateDB(); + } + //nothing left? + if(toknown.isEmpty() && toCrawl.isEmpty() && requested.size() == threads.size()) {//very uncommon + log.warn("nothing left to crawl"); + crawl = false; } - log.info(count + " videos added."); - crawlerThread.found.clear(); - } - //save to db - currentstate = "save to DB"; - log.info("save " + toSave.size() + " videos to DB."); - while(!toSave.isEmpty()) { - LinkedList videoids = new LinkedList<>(); - for(int i = 0; i < 50 && !toSave.isEmpty(); i++) { - videoids.add(toSave.remove(0)); + //refil the tocrawl list. + if(!toknown.isEmpty()) { + //check in db for known videos + log.info("Checking the DB"); + currentstate = "get new tocrawl"; + // listlock.writeLock().lock(); + while(toCrawl.size() < jobspeerthread * threads.size() * 2 && crawl && !toknown.isEmpty()) { + LinkedList tocheck = new LinkedList<>(); + for(int i = 0; i < toknown.size() && i < maxvideostotest; i++) { + tocheck.add(toknown.removeFirst()); + } + toCrawl.addAll(db.checkvideos(tocheck)); + } + // listlock.writeLock().unlock(); + } + if(toknown.size() < threadcount * jobspeerthread * 20 && crawl) { + currentstate = "restoretemp"; + log.info("restoreTemp"); + LinkedList rest = db.restoreTemp(); + toknown.addAll(rest); } - if(videoids.size() > 0) { - List