insertback & DB buffer

This commit is contained in:
MrBesen 2018-11-14 11:39:21 +01:00
parent bb46e9a5e6
commit 87f4bd3fd1
Signed by untrusted user: MrBesen
GPG Key ID: 596B2350DCD67504
3 changed files with 52 additions and 10 deletions

View File

@ -72,7 +72,7 @@ public class Crawler implements Runnable {
if(toCrawl.contains(videoid)) {
ArrayList<String> str = new ArrayList<String>(1);
str.add(videoid);
db.storeTemp(str);
db.storeTemp(str, false);
}
}
@ -269,7 +269,7 @@ public class Crawler implements Runnable {
} catch(NoSuchElementException ignored) {//concurrentmodification fuckery
log.info("no suchelement bla");
}
db.storeTemp(store);
db.storeTemp(store, false);
}
log.info(count + " videos added from " + threadname);
profiler.endSection();
@ -324,6 +324,22 @@ public class Crawler implements Runnable {
}
profiler.endSection();//main
}
profiler.startSection("waitforthreads");
for(CrawlerThread ct : threads) {
try {
ct.thread.join();
} catch (InterruptedException ignore) {}
}
log.info("All Threads Terminated.");
profiler.endStartSection("insertback");
ArrayList<String> putback = new ArrayList<>(threadcount * threads.get(0).undoneSize());//create list with approximated size
for(CrawlerThread ct : threads) {
putback.addAll(ct.undone());
}
db.storeTemp(putback, true);
profiler.endSection();//insertback
profiler.endSection();//root
log.info("Profiler:");
for(String s : profiler.getTreeView()) {
@ -414,3 +430,4 @@ public class Crawler implements Runnable {
}
}

View File

@ -1,6 +1,7 @@
package de.mrbesen.youtubecrawler;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -31,6 +32,14 @@ public class CrawlerThread implements Runnable {
thread = t;
}
LinkedList<String> undone() {
return todo;
}
int undoneSize() {
return todo.size();
}
@Override
public void run() {
while(parent.isCrawling()) {

View File

@ -26,9 +26,13 @@ public class DB implements Runnable {
private Server serv = new Server(this);
private Thread randomrefill = null;
private int dbsize = 0;
private StringBuilder tostorebuffer ;
private int writebuffersize = 500;
private int writebuffercurrentsize = 0;
private StringBuilder totempbuffer;
private int writetempbuffercurrentsize = 0;
public DB() {
try {
@ -69,6 +73,7 @@ public class DB implements Runnable {
log.warn("could not read the number \"" + Config.prop.getProperty("db.writebuffersize") + "\" from the config file. db.writebuffersize");
}
tostorebuffer = new StringBuilder(writebuffersize);
totempbuffer = new StringBuilder(writebuffersize);
} catch (SQLException e) {
log.error("Error while connecting to the database! ", e);
}
@ -143,15 +148,16 @@ public class DB implements Runnable {
}
if(writebuffercurrentsize > writebuffersize || force) {
if(tostorebuffer.length() > 10) {
log.info("Write data to DB video count: " + writebuffercurrentsize);
log.info("Write databuffer to DB video count: " + writebuffercurrentsize);
dbsize += writebuffercurrentsize;
tostorebuffer.deleteCharAt(0);//delete leading ','
String qu = "INSERT IGNORE INTO `ytcrawler`.`videos`(`id`, `length`, `created`, `langcode`, `category`, `videotitle`, `channel`, `tags`) VALUES " + tostorebuffer.toString();
update(qu);
//reset buffer
writebuffercurrentsize = 0;
tostorebuffer = new StringBuilder(writebuffersize);
}
//reset buffer
writebuffercurrentsize = 0;
tostorebuffer = new StringBuilder(writebuffersize);
}
}
@ -271,15 +277,25 @@ public class DB implements Runnable {
log.info("Delete Double done in " + ((System.currentTimeMillis() - start)/60000) + " min");
}
public void storeTemp(ArrayList<String> strings) {
public void storeTemp(ArrayList<String> strings, boolean force) {
if(strings == null)
return;
if(!strings.isEmpty()) {
StringBuilder sb = new StringBuilder();
log.info("store Temp to buffer: " + strings.size());
writetempbuffercurrentsize += strings.size();
for(String s : strings) {
sb.append("'), ('").append(s);
totempbuffer.append(", ('").append(s).append("')");
}
update("INSERT IGNORE INTO `ytcrawler`.`temp` (`ytid`) VALUES ('" + sb.substring(6).toString() + "');");
}
if(writetempbuffercurrentsize > writebuffersize || force) {
log.info("Write Buffer: " + writetempbuffercurrentsize);
totempbuffer.deleteCharAt(0);//delete leading ','
String qu = "INSERT IGNORE INTO `ytcrawler`.`temp` (`ytid`) VALUES " + totempbuffer.toString() + ";";
update(qu);
//reset
writetempbuffercurrentsize = 0;
totempbuffer = new StringBuilder(writebuffersize);
}
}