From 87f4bd3fd1ec786180b72b34f17df99de6060e68 Mon Sep 17 00:00:00 2001 From: MrBesen Date: Wed, 14 Nov 2018 11:39:21 +0100 Subject: [PATCH] insertback & DB buffer --- src/de/mrbesen/youtubecrawler/Crawler.java | 21 ++++++++++-- .../mrbesen/youtubecrawler/CrawlerThread.java | 9 ++++++ src/de/mrbesen/youtubecrawler/DB.java | 32 ++++++++++++++----- 3 files changed, 52 insertions(+), 10 deletions(-) diff --git a/src/de/mrbesen/youtubecrawler/Crawler.java b/src/de/mrbesen/youtubecrawler/Crawler.java index 174483d..1a6e372 100644 --- a/src/de/mrbesen/youtubecrawler/Crawler.java +++ b/src/de/mrbesen/youtubecrawler/Crawler.java @@ -72,7 +72,7 @@ public class Crawler implements Runnable { if(toCrawl.contains(videoid)) { ArrayList str = new ArrayList(1); str.add(videoid); - db.storeTemp(str); + db.storeTemp(str, false); } } @@ -269,7 +269,7 @@ public class Crawler implements Runnable { } catch(NoSuchElementException ignored) {//concurrentmodification fuckery log.info("no suchelement bla"); } - db.storeTemp(store); + db.storeTemp(store, false); } log.info(count + " videos added from " + threadname); profiler.endSection(); @@ -324,6 +324,22 @@ public class Crawler implements Runnable { } profiler.endSection();//main } + profiler.startSection("waitforthreads"); + for(CrawlerThread ct : threads) { + try { + ct.thread.join(); + } catch (InterruptedException ignore) {} + } + log.info("All Threads Terminated."); + + profiler.endStartSection("insertback"); + ArrayList putback = new ArrayList<>(threadcount * threads.get(0).undoneSize());//create list with approximated size + for(CrawlerThread ct : threads) { + putback.addAll(ct.undone()); + } + db.storeTemp(putback, true); + profiler.endSection();//insertback + profiler.endSection();//root log.info("Profiler:"); for(String s : profiler.getTreeView()) { @@ -414,3 +430,4 @@ public class Crawler implements Runnable { } } + diff --git a/src/de/mrbesen/youtubecrawler/CrawlerThread.java b/src/de/mrbesen/youtubecrawler/CrawlerThread.java index 20f2032..48c704b 100644 --- a/src/de/mrbesen/youtubecrawler/CrawlerThread.java +++ b/src/de/mrbesen/youtubecrawler/CrawlerThread.java @@ -1,6 +1,7 @@ package de.mrbesen.youtubecrawler; import java.io.IOException; +import java.util.ArrayList; import java.util.LinkedList; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -31,6 +32,14 @@ public class CrawlerThread implements Runnable { thread = t; } + LinkedList undone() { + return todo; + } + + int undoneSize() { + return todo.size(); + } + @Override public void run() { while(parent.isCrawling()) { diff --git a/src/de/mrbesen/youtubecrawler/DB.java b/src/de/mrbesen/youtubecrawler/DB.java index 0898181..7750ced 100644 --- a/src/de/mrbesen/youtubecrawler/DB.java +++ b/src/de/mrbesen/youtubecrawler/DB.java @@ -26,9 +26,13 @@ public class DB implements Runnable { private Server serv = new Server(this); private Thread randomrefill = null; private int dbsize = 0; + private StringBuilder tostorebuffer ; private int writebuffersize = 500; private int writebuffercurrentsize = 0; + + private StringBuilder totempbuffer; + private int writetempbuffercurrentsize = 0; public DB() { try { @@ -69,6 +73,7 @@ public class DB implements Runnable { log.warn("could not read the number \"" + Config.prop.getProperty("db.writebuffersize") + "\" from the config file. db.writebuffersize"); } tostorebuffer = new StringBuilder(writebuffersize); + totempbuffer = new StringBuilder(writebuffersize); } catch (SQLException e) { log.error("Error while connecting to the database! ", e); } @@ -143,15 +148,16 @@ public class DB implements Runnable { } if(writebuffercurrentsize > writebuffersize || force) { if(tostorebuffer.length() > 10) { - log.info("Write data to DB video count: " + writebuffercurrentsize); + log.info("Write databuffer to DB video count: " + writebuffercurrentsize); dbsize += writebuffercurrentsize; tostorebuffer.deleteCharAt(0);//delete leading ',' String qu = "INSERT IGNORE INTO `ytcrawler`.`videos`(`id`, `length`, `created`, `langcode`, `category`, `videotitle`, `channel`, `tags`) VALUES " + tostorebuffer.toString(); update(qu); + + //reset buffer + writebuffercurrentsize = 0; + tostorebuffer = new StringBuilder(writebuffersize); } - //reset buffer - writebuffercurrentsize = 0; - tostorebuffer = new StringBuilder(writebuffersize); } } @@ -271,15 +277,25 @@ public class DB implements Runnable { log.info("Delete Double done in " + ((System.currentTimeMillis() - start)/60000) + " min"); } - public void storeTemp(ArrayList strings) { + public void storeTemp(ArrayList strings, boolean force) { if(strings == null) return; if(!strings.isEmpty()) { - StringBuilder sb = new StringBuilder(); + log.info("store Temp to buffer: " + strings.size()); + writetempbuffercurrentsize += strings.size(); for(String s : strings) { - sb.append("'), ('").append(s); + totempbuffer.append(", ('").append(s).append("')"); } - update("INSERT IGNORE INTO `ytcrawler`.`temp` (`ytid`) VALUES ('" + sb.substring(6).toString() + "');"); + } + if(writetempbuffercurrentsize > writebuffersize || force) { + log.info("Write Buffer: " + writetempbuffercurrentsize); + totempbuffer.deleteCharAt(0);//delete leading ',' + String qu = "INSERT IGNORE INTO `ytcrawler`.`temp` (`ytid`) VALUES " + totempbuffer.toString() + ";"; + update(qu); + + //reset + writetempbuffercurrentsize = 0; + totempbuffer = new StringBuilder(writebuffersize); } }