forked from MrBesen/YoutubeCrawler
Merge branch 'master' of ssh://git.mrbesen.de:2222/MrBesen/YoutubeCrawler
This commit is contained in:
commit
d56f1271c4
|
@ -70,9 +70,9 @@ public class Crawler implements Runnable {
|
|||
//if(! (toCrawl.contains(videoid) || toknown.contains(videoid)))
|
||||
//toknown.add(videoid);
|
||||
if(toCrawl.contains(videoid)) {
|
||||
LinkedList<String> str = new LinkedList<String>();
|
||||
ArrayList<String> str = new ArrayList<String>(1);
|
||||
str.add(videoid);
|
||||
db.storeTemp(str);
|
||||
db.storeTemp(str, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -255,23 +255,21 @@ public class Crawler implements Runnable {
|
|||
|
||||
int count = 0;
|
||||
while(report[1].size() > 1) {//2 videos werden ggf. gelöscht ohne gesehen zu werden.
|
||||
LinkedList<String> store = null;
|
||||
ArrayList<String> store = null;
|
||||
try {
|
||||
if(report[1].size() <= 50) {
|
||||
store = report[1];
|
||||
store = new ArrayList<>(report[1]);
|
||||
count += report[1].size();
|
||||
report[1].clear();
|
||||
} else {
|
||||
store = new LinkedList<>();
|
||||
while(!report[1].isEmpty() && store.size() < 50) {
|
||||
store.add(report[1].removeFirst());
|
||||
count++;
|
||||
}
|
||||
store = new ArrayList<>(report[1].subList(0, 50));
|
||||
report[1].removeAll(store);
|
||||
count+=50;
|
||||
}
|
||||
} catch(NoSuchElementException ignored) {//concurrentmodification fuckery
|
||||
log.info("no suchelement bla");
|
||||
}
|
||||
db.storeTemp(store);
|
||||
db.storeTemp(store, false);
|
||||
}
|
||||
log.info(count + " videos added from " + threadname);
|
||||
profiler.endSection();
|
||||
|
@ -326,6 +324,22 @@ public class Crawler implements Runnable {
|
|||
}
|
||||
profiler.endSection();//main
|
||||
}
|
||||
profiler.startSection("waitforthreads");
|
||||
for(CrawlerThread ct : threads) {
|
||||
try {
|
||||
ct.thread.join();
|
||||
} catch (InterruptedException ignore) {}
|
||||
}
|
||||
log.info("All Threads Terminated.");
|
||||
|
||||
profiler.endStartSection("insertback");
|
||||
ArrayList<String> putback = new ArrayList<>(threadcount * threads.get(0).undoneSize());//create list with approximated size
|
||||
for(CrawlerThread ct : threads) {
|
||||
putback.addAll(ct.undone());
|
||||
}
|
||||
db.storeTemp(putback, true);
|
||||
profiler.endSection();//insertback
|
||||
|
||||
profiler.endSection();//root
|
||||
log.info("Profiler:");
|
||||
for(String s : profiler.getTreeView()) {
|
||||
|
@ -416,3 +430,4 @@ public class Crawler implements Runnable {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package de.mrbesen.youtubecrawler;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
@ -31,6 +32,14 @@ public class CrawlerThread implements Runnable {
|
|||
thread = t;
|
||||
}
|
||||
|
||||
LinkedList<String> undone() {
|
||||
return todo;
|
||||
}
|
||||
|
||||
int undoneSize() {
|
||||
return todo.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
while(parent.isCrawling()) {
|
||||
|
|
|
@ -26,9 +26,13 @@ public class DB implements Runnable {
|
|||
private Server serv = new Server(this);
|
||||
private Thread randomrefill = null;
|
||||
private int dbsize = 0;
|
||||
|
||||
private StringBuilder tostorebuffer ;
|
||||
private int writebuffersize = 500;
|
||||
private int writebuffercurrentsize = 0;
|
||||
|
||||
private StringBuilder totempbuffer;
|
||||
private int writetempbuffercurrentsize = 0;
|
||||
|
||||
public DB() {
|
||||
try {
|
||||
|
@ -69,6 +73,7 @@ public class DB implements Runnable {
|
|||
log.warn("could not read the number \"" + Config.prop.getProperty("db.writebuffersize") + "\" from the config file. db.writebuffersize");
|
||||
}
|
||||
tostorebuffer = new StringBuilder(writebuffersize);
|
||||
totempbuffer = new StringBuilder(writebuffersize);
|
||||
} catch (SQLException e) {
|
||||
log.error("Error while connecting to the database! ", e);
|
||||
}
|
||||
|
@ -143,15 +148,16 @@ public class DB implements Runnable {
|
|||
}
|
||||
if(writebuffercurrentsize > writebuffersize || force) {
|
||||
if(tostorebuffer.length() > 10) {
|
||||
log.info("Write data to DB video count: " + writebuffercurrentsize);
|
||||
log.info("Write databuffer to DB video count: " + writebuffercurrentsize);
|
||||
dbsize += writebuffercurrentsize;
|
||||
tostorebuffer.deleteCharAt(0);//delete leading ','
|
||||
String qu = "INSERT IGNORE INTO `ytcrawler`.`videos`(`id`, `length`, `created`, `langcode`, `category`, `videotitle`, `channel`, `tags`) VALUES " + tostorebuffer.toString();
|
||||
update(qu);
|
||||
|
||||
//reset buffer
|
||||
writebuffercurrentsize = 0;
|
||||
tostorebuffer = new StringBuilder(writebuffersize);
|
||||
}
|
||||
//reset buffer
|
||||
writebuffercurrentsize = 0;
|
||||
tostorebuffer = new StringBuilder(writebuffersize);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -271,15 +277,25 @@ public class DB implements Runnable {
|
|||
log.info("Delete Double done in " + ((System.currentTimeMillis() - start)/60000) + " min");
|
||||
}
|
||||
|
||||
public void storeTemp(LinkedList<String> strings) {
|
||||
public void storeTemp(ArrayList<String> strings, boolean force) {
|
||||
if(strings == null)
|
||||
return;
|
||||
if(!strings.isEmpty()) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
log.info("store Temp to buffer: " + strings.size());
|
||||
writetempbuffercurrentsize += strings.size();
|
||||
for(String s : strings) {
|
||||
sb.append("'), ('").append(s);
|
||||
totempbuffer.append(", ('").append(s).append("')");
|
||||
}
|
||||
update("INSERT IGNORE INTO `ytcrawler`.`temp` (`ytid`) VALUES ('" + sb.substring(6).toString() + "');");
|
||||
}
|
||||
if(writetempbuffercurrentsize > writebuffersize || force) {
|
||||
log.info("Write Buffer: " + writetempbuffercurrentsize);
|
||||
totempbuffer.deleteCharAt(0);//delete leading ','
|
||||
String qu = "INSERT IGNORE INTO `ytcrawler`.`temp` (`ytid`) VALUES " + totempbuffer.toString() + ";";
|
||||
update(qu);
|
||||
|
||||
//reset
|
||||
writetempbuffercurrentsize = 0;
|
||||
totempbuffer = new StringBuilder(writebuffersize);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue