From 4471c0f01dd519bedb96e0965150391675755d1a Mon Sep 17 00:00:00 2001 From: MrBesen Date: Fri, 12 Oct 2018 17:22:36 +0200 Subject: [PATCH] performance improved --- src/de/mrbesen/youtubecrawler/Crawler.java | 67 ++++++++++++------- .../mrbesen/youtubecrawler/CrawlerThread.java | 2 +- src/de/mrbesen/youtubecrawler/DB.java | 44 ++++++++---- src/de/mrbesen/youtubecrawler/Main.java | 12 +++- src/de/mrbesen/youtubecrawler/Profiler.java | 66 +++++++----------- src/de/mrbesen/youtubecrawler/YoutubeAPI.java | 9 +-- 6 files changed, 114 insertions(+), 86 deletions(-) diff --git a/src/de/mrbesen/youtubecrawler/Crawler.java b/src/de/mrbesen/youtubecrawler/Crawler.java index 3914847..5346ac8 100644 --- a/src/de/mrbesen/youtubecrawler/Crawler.java +++ b/src/de/mrbesen/youtubecrawler/Crawler.java @@ -19,6 +19,8 @@ import org.apache.log4j.Logger; public class Crawler implements Runnable { private int jobspeerthread = 100; //the amount of jobs a thread get peer request + int requestlimit = 5;//amount of videos to be left in the todo que of a thread until it requests new videos + private int idlecount = 5;//amount of idle loops allowed private LinkedList toSave = new LinkedList<>();//all found ytids, witch need to be analysed private LinkedList toCrawl = new LinkedList<>();//all videos tu crawl @@ -31,7 +33,7 @@ public class Crawler implements Runnable { private boolean crawl = true; private int crawlcount = 0; - private int updateOffset = 0; + //private int updateOffset = 0; private DB db = new DB(); private YoutubeAPI api = new YoutubeAPI(); @@ -48,6 +50,16 @@ public class Crawler implements Runnable { log.warn("could not read the number \"" + Config.prop.getProperty("crawler.jobspeerthread") + "\" from the config file. maxvideo"); jobspeerthread = 100; } + try { + requestlimit = Integer.parseInt(Config.prop.getProperty("crawler.requestlimit")); + } catch(NumberFormatException e) { + log.warn("could not read the number \"" + Config.prop.getProperty("crawler.requestlimit") + "\" from the config file. crawler.requestlimit"); + } + try { + idlecount = Integer.parseInt(Config.prop.getProperty("crawler.idlecount")); + } catch(NumberFormatException e) { + log.warn("could not read the number \"" + Config.prop.getProperty("crawler.idlecount") + "\" from the config file. crawler.idlecount"); + } } public void stop() { @@ -122,7 +134,7 @@ public class Crawler implements Runnable { e.printStackTrace(); } } - profiler.endStartSection("populateThreads"); + profiler.endStartSection("populateThreads");//loading crawlfile closed //populate threads int threadcount = 4; try { @@ -138,13 +150,14 @@ public class Crawler implements Runnable { threads.add(thr); thr.thread.start(); } - profiler.endStartSection("deleteDouble"); + profiler.endStartSection("deleteDouble");//populate threads long lastdoubledelete = System.currentTimeMillis(); //db.deleteDouble(); + profiler.endSection();//deletedouble profiler.endSection();//startup - profiler.endStartSection("main"); boolean savedall = false;//ein 2. durch lauf, um wirklich alles zu speichern while(crawl || savedall) { + profiler.startSection("main"); log.info("to Crawl: " + toCrawl.size() + /*" known: " + toknown.size() +*/ " Time: " + dateform.format(new Date())); if(!crawl) savedall = true; @@ -157,21 +170,25 @@ public class Crawler implements Runnable { } //kindof idle - profiler.endStartSection("idle"); - while(toCrawl.size() > (jobspeerthread * threads.size()) && crawl && requested.isEmpty()) { - startup = 0;//stop startup count - if((System.currentTimeMillis() - lastdoubledelete) / 1000 > 1800) { - db.deleteDouble(); - lastdoubledelete = System.currentTimeMillis(); - } else { - Thread.yield(); - try { - Thread.sleep(100); - } catch(InterruptedException ignored) { - break; + { + int count = 0;//donst stay to long in idle! + profiler.endStartSection("idle"); + while(toCrawl.size() > (jobspeerthread * threads.size() * 2) && crawl && requested.isEmpty() && count < idlecount) { + count ++; + startup = 0;//stop startup count + if((System.currentTimeMillis() - lastdoubledelete) / 1000 > 1800) { + //db.deleteDouble(); + lastdoubledelete = System.currentTimeMillis(); + } else { + Thread.yield(); + try { + Thread.sleep(100); + } catch(InterruptedException ignored) { + break; + } } + // updateDB(); } - // updateDB(); } //nothing left? if(/*toknown.isEmpty() && */toCrawl.isEmpty() && requested.size() == threads.size()) {//very uncommon @@ -282,13 +299,13 @@ public class Crawler implements Runnable { } if(videoids.size() > 0) { profiler.startSection("getinfo"); - List