performance improved

This commit is contained in:
MrBesen 2018-10-12 17:22:36 +02:00
parent 2080602278
commit 4471c0f01d
6 changed files with 114 additions and 86 deletions

View File

@ -19,6 +19,8 @@ import org.apache.log4j.Logger;
public class Crawler implements Runnable { public class Crawler implements Runnable {
private int jobspeerthread = 100; //the amount of jobs a thread get peer request private int jobspeerthread = 100; //the amount of jobs a thread get peer request
int requestlimit = 5;//amount of videos to be left in the todo que of a thread until it requests new videos
private int idlecount = 5;//amount of idle loops allowed
private LinkedList<String> toSave = new LinkedList<>();//all found ytids, witch need to be analysed private LinkedList<String> toSave = new LinkedList<>();//all found ytids, witch need to be analysed
private LinkedList<String> toCrawl = new LinkedList<>();//all videos tu crawl private LinkedList<String> toCrawl = new LinkedList<>();//all videos tu crawl
@ -31,7 +33,7 @@ public class Crawler implements Runnable {
private boolean crawl = true; private boolean crawl = true;
private int crawlcount = 0; private int crawlcount = 0;
private int updateOffset = 0; //private int updateOffset = 0;
private DB db = new DB(); private DB db = new DB();
private YoutubeAPI api = new YoutubeAPI(); private YoutubeAPI api = new YoutubeAPI();
@ -48,6 +50,16 @@ public class Crawler implements Runnable {
log.warn("could not read the number \"" + Config.prop.getProperty("crawler.jobspeerthread") + "\" from the config file. maxvideo"); log.warn("could not read the number \"" + Config.prop.getProperty("crawler.jobspeerthread") + "\" from the config file. maxvideo");
jobspeerthread = 100; jobspeerthread = 100;
} }
try {
requestlimit = Integer.parseInt(Config.prop.getProperty("crawler.requestlimit"));
} catch(NumberFormatException e) {
log.warn("could not read the number \"" + Config.prop.getProperty("crawler.requestlimit") + "\" from the config file. crawler.requestlimit");
}
try {
idlecount = Integer.parseInt(Config.prop.getProperty("crawler.idlecount"));
} catch(NumberFormatException e) {
log.warn("could not read the number \"" + Config.prop.getProperty("crawler.idlecount") + "\" from the config file. crawler.idlecount");
}
} }
public void stop() { public void stop() {
@ -122,7 +134,7 @@ public class Crawler implements Runnable {
e.printStackTrace(); e.printStackTrace();
} }
} }
profiler.endStartSection("populateThreads"); profiler.endStartSection("populateThreads");//loading crawlfile closed
//populate threads //populate threads
int threadcount = 4; int threadcount = 4;
try { try {
@ -138,13 +150,14 @@ public class Crawler implements Runnable {
threads.add(thr); threads.add(thr);
thr.thread.start(); thr.thread.start();
} }
profiler.endStartSection("deleteDouble"); profiler.endStartSection("deleteDouble");//populate threads
long lastdoubledelete = System.currentTimeMillis(); long lastdoubledelete = System.currentTimeMillis();
//db.deleteDouble(); //db.deleteDouble();
profiler.endSection();//deletedouble
profiler.endSection();//startup profiler.endSection();//startup
profiler.endStartSection("main");
boolean savedall = false;//ein 2. durch lauf, um wirklich alles zu speichern boolean savedall = false;//ein 2. durch lauf, um wirklich alles zu speichern
while(crawl || savedall) { while(crawl || savedall) {
profiler.startSection("main");
log.info("to Crawl: " + toCrawl.size() + /*" known: " + toknown.size() +*/ " Time: " + dateform.format(new Date())); log.info("to Crawl: " + toCrawl.size() + /*" known: " + toknown.size() +*/ " Time: " + dateform.format(new Date()));
if(!crawl) if(!crawl)
savedall = true; savedall = true;
@ -157,21 +170,25 @@ public class Crawler implements Runnable {
} }
//kindof idle //kindof idle
profiler.endStartSection("idle"); {
while(toCrawl.size() > (jobspeerthread * threads.size()) && crawl && requested.isEmpty()) { int count = 0;//donst stay to long in idle!
startup = 0;//stop startup count profiler.endStartSection("idle");
if((System.currentTimeMillis() - lastdoubledelete) / 1000 > 1800) { while(toCrawl.size() > (jobspeerthread * threads.size() * 2) && crawl && requested.isEmpty() && count < idlecount) {
db.deleteDouble(); count ++;
lastdoubledelete = System.currentTimeMillis(); startup = 0;//stop startup count
} else { if((System.currentTimeMillis() - lastdoubledelete) / 1000 > 1800) {
Thread.yield(); //db.deleteDouble();
try { lastdoubledelete = System.currentTimeMillis();
Thread.sleep(100); } else {
} catch(InterruptedException ignored) { Thread.yield();
break; try {
Thread.sleep(100);
} catch(InterruptedException ignored) {
break;
}
} }
// updateDB();
} }
// updateDB();
} }
//nothing left? //nothing left?
if(/*toknown.isEmpty() && */toCrawl.isEmpty() && requested.size() == threads.size()) {//very uncommon if(/*toknown.isEmpty() && */toCrawl.isEmpty() && requested.size() == threads.size()) {//very uncommon
@ -282,13 +299,13 @@ public class Crawler implements Runnable {
} }
if(videoids.size() > 0) { if(videoids.size() > 0) {
profiler.startSection("getinfo"); profiler.startSection("getinfo");
List<Video> videos = api.getInfos(videoids)[0]; ArrayList<Video> videos = (ArrayList<Video>) api.getInfos(videoids)[0];
profiler.endStartSection("sendtoDB"); profiler.endStartSection("sendtoDB");
db.addVideos(videos); db.addVideos(videos, false);
profiler.endSection(); profiler.endSection();
} }
} }
profiler.endSection(); profiler.endSection();//save2DB
//at the beginning there is maybe just one video to crawl, so keep it calm. //at the beginning there is maybe just one video to crawl, so keep it calm.
if(startup > 0) { if(startup > 0) {
@ -299,7 +316,7 @@ public class Crawler implements Runnable {
Thread.sleep(2000); Thread.sleep(2000);
} catch(InterruptedException e) {} } catch(InterruptedException e) {}
finally { finally {
profiler.endSection(); profiler.endSection();//startupsleep
} }
} }
} catch(Throwable t) { } catch(Throwable t) {
@ -312,8 +329,9 @@ public class Crawler implements Runnable {
crawl = false; crawl = false;
Main.getMain().stop(); Main.getMain().stop();
} }
profiler.endSection();//main
} }
profiler.endStartSection("cleanup"); profiler.startSection("cleanup");
profiler.startSection("deleteDouble"); profiler.startSection("deleteDouble");
//db.deleteDouble(); //db.deleteDouble();
profiler.endStartSection("stopDB"); profiler.endStartSection("stopDB");
@ -321,9 +339,6 @@ public class Crawler implements Runnable {
profiler.endSection(); profiler.endSection();
profiler.endSection();//root profiler.endSection();//root
log.info("Profiler:"); log.info("Profiler:");
//for (Result res : profiler.getProfilingData("root")) {
// log.info(res.profilerName + " " + res.usePercentage + "% total: " + res.usePercentage + "%");
//}
for(String s : profiler.getTreeView()) { for(String s : profiler.getTreeView()) {
log.info(s); log.info(s);
} }
@ -378,6 +393,7 @@ public class Crawler implements Runnable {
/** /**
* Updates old entrys of the DB. currently unused. * Updates old entrys of the DB. currently unused.
*/ */
/*
private void updateDB() { private void updateDB() {
log.info("updating DB Offset= " + updateOffset); log.info("updating DB Offset= " + updateOffset);
LinkedList<String> vids = db.getUncompleted(50, updateOffset); LinkedList<String> vids = db.getUncompleted(50, updateOffset);
@ -396,6 +412,7 @@ public class Crawler implements Runnable {
log.info("Updated " + infos[0].size() + " Videos."); log.info("Updated " + infos[0].size() + " Videos.");
} }
} }
*/
public static class Video { public static class Video {
String id = ""; String id = "";

View File

@ -43,7 +43,7 @@ public class CrawlerThread implements Runnable {
} }
} }
crawl(todo.removeFirst()); crawl(todo.removeFirst());
if(todo.size() < 5 && !requested) { if(todo.size() < parent.requestlimit && !requested) {
requested = true; requested = true;
parent.request(this); parent.request(this);
} }

View File

@ -26,6 +26,8 @@ public class DB implements Runnable {
private Server serv = new Server(this); private Server serv = new Server(this);
private Thread randomrefill = null; private Thread randomrefill = null;
private int dbsize = 0; private int dbsize = 0;
private ArrayList<Video> tostorebuffer;
private int writebuffersize = 500;
public DB() { public DB() {
try { try {
@ -52,20 +54,28 @@ public class DB implements Runnable {
log.info("Database is set up!"); log.info("Database is set up!");
} }
serv.start(); serv.start();
refillbuffer(); refillbuffer();
//get db size //get db size
dbsize(); dbsize();
//config data
try {
writebuffersize = Integer.parseInt(Config.prop.getProperty("db.writebuffersize"));
} catch(NumberFormatException e) {
log.warn("could not read the number \"" + Config.prop.getProperty("db.writebuffersize") + "\" from the config file. db.writebuffersize");
}
tostorebuffer = new ArrayList<>(writebuffersize);
} catch (SQLException e) { } catch (SQLException e) {
log.error("Error while connecting to the database! ", e); log.error("Error while connecting to the database! ", e);
} }
} }
private void dbsize() { private void dbsize() {
try { try {
ResultSet set = query("SELECT count(*) as count FROM `videos`;"); ResultSet set = query("SELECT count(*) as count FROM `videos`;");
if(set != null) { if(set != null) {
if(set.next()) { if(set.next()) {
dbsize = set.getInt(1); dbsize = set.getInt(1);
@ -75,12 +85,11 @@ public class DB implements Runnable {
e.printStackTrace(); e.printStackTrace();
} }
} }
public int getDBSize() { public int getDBSize() {
return dbsize; return dbsize;
} }
private void connect(boolean selectdb) { private void connect(boolean selectdb) {
try { try {
Class.forName("com.mysql.jdbc.Driver");//Treiber laden try this driver: com.mysql.cj.jdbc.Driver Class.forName("com.mysql.jdbc.Driver");//Treiber laden try this driver: com.mysql.cj.jdbc.Driver
@ -119,18 +128,24 @@ public class DB implements Runnable {
* save the list of videos to the DB * save the list of videos to the DB
* @param input * @param input
*/ */
public void addVideos(List<Video> input) { public void addVideos(List<Video> input, boolean force) {
//log.info("add " + input.size() + " videos"); //log.info("add " + input.size() + " videos");
if(input.size() > 0) { if(input != null) {
dbsize += input.size(); if(input.size() > 0) {
tostorebuffer.addAll(input);
}
}
if(tostorebuffer.size() > writebuffersize || force) {
dbsize += tostorebuffer.size();
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
for(int i = 0; i< input.size(); i++) { for(int i = 0; i < tostorebuffer.size(); i++) {
if(i > 0) if(i > 0)
sb.append(','); sb.append(',');
Video v = input.get(i); Video v = tostorebuffer.get(i);
if(v != null) if(v != null)
sb.append("('").append(v.id).append("','").append(v.length).append("','").append(v.created).append("','").append(v.languageCode).append("','").append(v.categorie).append("','").append(v.title).append("','").append(v.channel).append("','").append(v.tags).append("') "); sb.append("('").append(v.id).append("','").append(v.length).append("','").append(v.created).append("','").append(v.languageCode).append("','").append(v.categorie).append("','").append(v.title).append("','").append(v.channel).append("','").append(v.tags).append("') ");
} }
tostorebuffer.clear();
if(sb.length() > 2) { if(sb.length() > 2) {
String qu = "INSERT IGNORE INTO `ytcrawler`.`videos`(`id`, `length`, `created`, `langcode`, `category`, `videotitle`, `channel`, `tags`) VALUES " + sb.toString(); String qu = "INSERT IGNORE INTO `ytcrawler`.`videos`(`id`, `length`, `created`, `langcode`, `category`, `videotitle`, `channel`, `tags`) VALUES " + sb.toString();
update(qu); update(qu);
@ -217,7 +232,7 @@ public class DB implements Runnable {
randomrefill.start(); randomrefill.start();
} }
} }
public String getRandom() { public String getRandom() {
log.info("Get random Video"); log.info("Get random Video");
if(randombuffer.size() < 10 ) { if(randombuffer.size() < 10 ) {
@ -233,7 +248,7 @@ public class DB implements Runnable {
public int getRandomCount() { public int getRandomCount() {
return randombuffer.size(); return randombuffer.size();
} }
public LinkedList<String> restoreTemp() { public LinkedList<String> restoreTemp() {
ResultSet res = query("SELECT * FROM `ytcrawler`.`temp` LIMIT 500;"); ResultSet res = query("SELECT * FROM `ytcrawler`.`temp` LIMIT 500;");
LinkedList<String> out = new LinkedList<>(); LinkedList<String> out = new LinkedList<>();
@ -266,7 +281,7 @@ public class DB implements Runnable {
} }
} }
/** /**
* Stops the randomnes-Server and disconnect * Stops the randomnes-Server and disconnect
*/ */
@ -275,6 +290,7 @@ public class DB implements Runnable {
try { try {
if(con != null) { if(con != null) {
if(!con.isClosed()) { if(!con.isClosed()) {
addVideos(null, true);
con.close(); con.close();
} }
} }

View File

@ -12,14 +12,14 @@ import org.json.JSONObject;
import de.mrbesen.telegram.MessageBuilder; import de.mrbesen.telegram.MessageBuilder;
import de.mrbesen.telegram.TelegramAPI; import de.mrbesen.telegram.TelegramAPI;
import de.mrbesen.telegram.commands.CommandHandler; import de.mrbesen.telegram.commands.JSONCommandHandler;
import de.mrbesen.telegram.event.EventHandler; import de.mrbesen.telegram.event.EventHandler;
import de.mrbesen.telegram.event.EventListener; import de.mrbesen.telegram.event.EventListener;
import de.mrbesen.telegram.event.events.UserSendMessageEvent; import de.mrbesen.telegram.event.events.UserSendMessageEvent;
import de.mrbesen.telegram.log.Log4JLog; import de.mrbesen.telegram.log.Log4JLog;
import de.mrbesen.telegram.objects.TUser; import de.mrbesen.telegram.objects.TUser;
public class Main implements CommandHandler, EventListener{ public class Main implements JSONCommandHandler, EventListener{
private ArrayList<String> admins = new ArrayList<>();//usernames of admins private ArrayList<String> admins = new ArrayList<>();//usernames of admins
private String adminstr = null; private String adminstr = null;
@ -174,6 +174,12 @@ public class Main implements CommandHandler, EventListener{
mainthread.interrupt(); mainthread.interrupt();
} }
@Override
public boolean onCommand(TUser sender, String cmd, String[] args) {
//unused
return false;
}
@Override @Override
public boolean onCommand(TUser sender, String cmd, String[] args, JSONObject j) { public boolean onCommand(TUser sender, String cmd, String[] args, JSONObject j) {
if(cmd.startsWith("random")) { if(cmd.startsWith("random")) {
@ -210,6 +216,8 @@ public class Main implements CommandHandler, EventListener{
for(String s : cra.getProfiling()) { for(String s : cra.getProfiling()) {
sb.append(s).append('\n'); sb.append(s).append('\n');
} }
if(sb.length() == 0)
sb.append("No Data.");
sender.sendMessage(sb.toString()); sender.sendMessage(sb.toString());
return true; return true;
} }

View File

@ -27,6 +27,7 @@ public class Profiler {
this.profilingMap.clear(); this.profilingMap.clear();
this.profilingSection = ""; this.profilingSection = "";
this.sectionList.clear(); this.sectionList.clear();
start = -1;
} }
/** /**
@ -36,6 +37,7 @@ public class Profiler {
if (this.profilingEnabled) { if (this.profilingEnabled) {
if(start == -1) if(start == -1)
start = System.nanoTime(); start = System.nanoTime();
if (this.profilingSection.length() > 0) { if (this.profilingSection.length() > 0) {
this.profilingSection = this.profilingSection + "."; this.profilingSection = this.profilingSection + ".";
} }
@ -43,6 +45,7 @@ public class Profiler {
this.profilingSection = this.profilingSection + name; this.profilingSection = this.profilingSection + name;
this.sectionList.add(this.profilingSection); this.sectionList.add(this.profilingSection);
this.timestampList.add(Long.valueOf(System.nanoTime())); this.timestampList.add(Long.valueOf(System.nanoTime()));
//System.out.println("started section: " + name + " currently in: " + profilingSection);
} }
} }
@ -51,20 +54,23 @@ public class Profiler {
*/ */
public void endSection() { public void endSection() {
if (this.profilingEnabled) { if (this.profilingEnabled) {
long start = System.nanoTime(); try {
long stop = ((Long) this.timestampList.remove(this.timestampList.size() - 1)).longValue(); long start = System.nanoTime();
this.sectionList.remove(this.sectionList.size() - 1); long stop = ((Long) this.timestampList.remove(this.timestampList.size() - 1)).longValue();
long delta = start - stop; this.sectionList.remove(this.sectionList.size() - 1);
long delta = start - stop;
if (this.profilingMap.containsKey(this.profilingSection)) { if (this.profilingMap.containsKey(this.profilingSection)) {
this.profilingMap.put(this.profilingSection, this.profilingMap.put(this.profilingSection,
Long.valueOf(((Long) this.profilingMap.get(this.profilingSection)).longValue() + delta)); Long.valueOf(((Long) this.profilingMap.get(this.profilingSection)).longValue() + delta));
} else { } else {
this.profilingMap.put(this.profilingSection, Long.valueOf(delta)); this.profilingMap.put(this.profilingSection, Long.valueOf(delta));
}
this.profilingSection = this.sectionList.isEmpty() ? "" : (String) this.sectionList.get(this.sectionList.size() - 1);
} catch (Exception e ) {
e.printStackTrace();
} }
this.profilingSection = this.sectionList.isEmpty() ? ""
: (String) this.sectionList.get(this.sectionList.size() - 1);
} }
} }
@ -78,10 +84,8 @@ public class Profiler {
} }
public List<Profiler.Result> getProfilingData(String profilerName) { public List<Profiler.Result> getProfilingData(String profilerName) {
if (!this.profilingEnabled) { if (this.profilingEnabled) {
return Collections.<Profiler.Result>emptyList(); List<Profiler.Result> out = Lists.<Profiler.Result>newArrayList();
} else {
List<Profiler.Result> out = Lists.<Profiler.Result>newArrayList();
long totaltime = System.nanoTime() - start; long totaltime = System.nanoTime() - start;
//calculate percentage of each child section //calculate percentage of each child section
@ -93,44 +97,26 @@ public class Profiler {
out.add(new Profiler.Result(prfiler_name, totaltimep, subsectiontime)); out.add(new Profiler.Result(prfiler_name, totaltimep, subsectiontime));
} }
} }
for (String key : this.profilingMap.keySet()) { for (String key : this.profilingMap.keySet()) {
this.profilingMap.put(key, Long.valueOf(((Long) this.profilingMap.get(key)).longValue() * 999L / 1000L)); this.profilingMap.put(key, Long.valueOf(((Long) this.profilingMap.get(key)).longValue() * 999L / 1000L));
} }
return out; return out;
} else {
return Collections.<Profiler.Result>emptyList();
} }
} }
public LinkedList<String> getTreeView() { public LinkedList<String> getTreeView() {
/*for(String key : profilingMap.keySet()) { return getTreeView("root", "");
System.out.println(key);
}*/
//end all sections
try {
/*LinkedList<String> sections = new LinkedList<>();
while(!sectionList.isEmpty()) {
String current = sectionList.get(sectionList.size() -1);
if(current != null) {
sections.add(current.substring(current.lastIndexOf('.')+1));
endSection();
}
}
for(String section : sections) {
startSection(section);
}
*/
return getTreeView("root", "");
} catch(OutOfMemoryError e) {
e.printStackTrace();
}
return null;
} }
private LinkedList<String> getTreeView(String name, String leading) { private LinkedList<String> getTreeView(String name, String leading) {
LinkedList<String> out = new LinkedList<>(); LinkedList<String> out = new LinkedList<>();
if(new Exception().getStackTrace().length > 20)//prevent stack overflow debug only if(new Exception().getStackTrace().length > 50)//prevent stack overflow debug only
return out; return out;
for(Result res : getProfilingData(name)) { for(Result res : getProfilingData(name)) {
out.add(leading + res.profilerName + " " + (res.time/10000000)/100D + "s " + form(res.totalUsePercentage) + "% "); out.add(leading + res.profilerName + " " + (res.time/10000000)/100D + "s " + form(res.totalUsePercentage) + "% ");
out.addAll(getTreeView(res.profilerName, leading + "\t")); out.addAll(getTreeView(res.profilerName, leading + "\t"));

View File

@ -7,6 +7,7 @@ import java.net.URL;
import java.text.DateFormat; import java.text.DateFormat;
import java.text.ParseException; import java.text.ParseException;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
@ -35,7 +36,7 @@ public class YoutubeAPI {
return (Video) getInfos(id)[0].get(0); return (Video) getInfos(id)[0].get(0);
} }
public LinkedList<Video>[] getInfos(List<String> ids) { public List<Video>[] getInfos(List<String> ids) {
log.info("get " + ids.size() + " infos"); log.info("get " + ids.size() + " infos");
if(ids.isEmpty()) if(ids.isEmpty())
return null; return null;
@ -47,8 +48,8 @@ public class YoutubeAPI {
return getInfos(sb.toString()); return getInfos(sb.toString());
} }
public LinkedList<Video>[] getInfos(String idlist) { public List<Video>[] getInfos(String idlist) {
LinkedList<Video> out = new LinkedList<Video>(); ArrayList<Video> out = new ArrayList<Video>(idlist.length() / 12);//approximierte vorraussichtliche länge
LinkedList<Video> livestr = new LinkedList<Video>(); LinkedList<Video> livestr = new LinkedList<Video>();
String nextpage = ""; String nextpage = "";
do { do {
@ -138,7 +139,7 @@ public class YoutubeAPI {
} }
} while(!nextpage.equals("")); } while(!nextpage.equals(""));
log.info("got " + (out.size() + livestr.size()) + " infos"); log.info("got " + (out.size() + livestr.size()) + " infos");
return new LinkedList[] {out, livestr}; return new List[] {out, livestr};
} }
private String removeunwanted(String in) { private String removeunwanted(String in) {