performance improved

This commit is contained in:
MrBesen 2018-10-12 17:22:36 +02:00
parent 2080602278
commit 4471c0f01d
6 changed files with 114 additions and 86 deletions

View File

@ -19,6 +19,8 @@ import org.apache.log4j.Logger;
public class Crawler implements Runnable {
private int jobspeerthread = 100; //the amount of jobs a thread get peer request
int requestlimit = 5;//amount of videos to be left in the todo que of a thread until it requests new videos
private int idlecount = 5;//amount of idle loops allowed
private LinkedList<String> toSave = new LinkedList<>();//all found ytids, witch need to be analysed
private LinkedList<String> toCrawl = new LinkedList<>();//all videos tu crawl
@ -31,7 +33,7 @@ public class Crawler implements Runnable {
private boolean crawl = true;
private int crawlcount = 0;
private int updateOffset = 0;
//private int updateOffset = 0;
private DB db = new DB();
private YoutubeAPI api = new YoutubeAPI();
@ -48,6 +50,16 @@ public class Crawler implements Runnable {
log.warn("could not read the number \"" + Config.prop.getProperty("crawler.jobspeerthread") + "\" from the config file. maxvideo");
jobspeerthread = 100;
}
try {
requestlimit = Integer.parseInt(Config.prop.getProperty("crawler.requestlimit"));
} catch(NumberFormatException e) {
log.warn("could not read the number \"" + Config.prop.getProperty("crawler.requestlimit") + "\" from the config file. crawler.requestlimit");
}
try {
idlecount = Integer.parseInt(Config.prop.getProperty("crawler.idlecount"));
} catch(NumberFormatException e) {
log.warn("could not read the number \"" + Config.prop.getProperty("crawler.idlecount") + "\" from the config file. crawler.idlecount");
}
}
public void stop() {
@ -122,7 +134,7 @@ public class Crawler implements Runnable {
e.printStackTrace();
}
}
profiler.endStartSection("populateThreads");
profiler.endStartSection("populateThreads");//loading crawlfile closed
//populate threads
int threadcount = 4;
try {
@ -138,13 +150,14 @@ public class Crawler implements Runnable {
threads.add(thr);
thr.thread.start();
}
profiler.endStartSection("deleteDouble");
profiler.endStartSection("deleteDouble");//populate threads
long lastdoubledelete = System.currentTimeMillis();
//db.deleteDouble();
profiler.endSection();//deletedouble
profiler.endSection();//startup
profiler.endStartSection("main");
boolean savedall = false;//ein 2. durch lauf, um wirklich alles zu speichern
while(crawl || savedall) {
profiler.startSection("main");
log.info("to Crawl: " + toCrawl.size() + /*" known: " + toknown.size() +*/ " Time: " + dateform.format(new Date()));
if(!crawl)
savedall = true;
@ -157,21 +170,25 @@ public class Crawler implements Runnable {
}
//kindof idle
profiler.endStartSection("idle");
while(toCrawl.size() > (jobspeerthread * threads.size()) && crawl && requested.isEmpty()) {
startup = 0;//stop startup count
if((System.currentTimeMillis() - lastdoubledelete) / 1000 > 1800) {
db.deleteDouble();
lastdoubledelete = System.currentTimeMillis();
} else {
Thread.yield();
try {
Thread.sleep(100);
} catch(InterruptedException ignored) {
break;
{
int count = 0;//donst stay to long in idle!
profiler.endStartSection("idle");
while(toCrawl.size() > (jobspeerthread * threads.size() * 2) && crawl && requested.isEmpty() && count < idlecount) {
count ++;
startup = 0;//stop startup count
if((System.currentTimeMillis() - lastdoubledelete) / 1000 > 1800) {
//db.deleteDouble();
lastdoubledelete = System.currentTimeMillis();
} else {
Thread.yield();
try {
Thread.sleep(100);
} catch(InterruptedException ignored) {
break;
}
}
// updateDB();
}
// updateDB();
}
//nothing left?
if(/*toknown.isEmpty() && */toCrawl.isEmpty() && requested.size() == threads.size()) {//very uncommon
@ -282,13 +299,13 @@ public class Crawler implements Runnable {
}
if(videoids.size() > 0) {
profiler.startSection("getinfo");
List<Video> videos = api.getInfos(videoids)[0];
ArrayList<Video> videos = (ArrayList<Video>) api.getInfos(videoids)[0];
profiler.endStartSection("sendtoDB");
db.addVideos(videos);
db.addVideos(videos, false);
profiler.endSection();
}
}
profiler.endSection();
profiler.endSection();//save2DB
//at the beginning there is maybe just one video to crawl, so keep it calm.
if(startup > 0) {
@ -299,7 +316,7 @@ public class Crawler implements Runnable {
Thread.sleep(2000);
} catch(InterruptedException e) {}
finally {
profiler.endSection();
profiler.endSection();//startupsleep
}
}
} catch(Throwable t) {
@ -312,8 +329,9 @@ public class Crawler implements Runnable {
crawl = false;
Main.getMain().stop();
}
profiler.endSection();//main
}
profiler.endStartSection("cleanup");
profiler.startSection("cleanup");
profiler.startSection("deleteDouble");
//db.deleteDouble();
profiler.endStartSection("stopDB");
@ -321,9 +339,6 @@ public class Crawler implements Runnable {
profiler.endSection();
profiler.endSection();//root
log.info("Profiler:");
//for (Result res : profiler.getProfilingData("root")) {
// log.info(res.profilerName + " " + res.usePercentage + "% total: " + res.usePercentage + "%");
//}
for(String s : profiler.getTreeView()) {
log.info(s);
}
@ -378,6 +393,7 @@ public class Crawler implements Runnable {
/**
* Updates old entrys of the DB. currently unused.
*/
/*
private void updateDB() {
log.info("updating DB Offset= " + updateOffset);
LinkedList<String> vids = db.getUncompleted(50, updateOffset);
@ -396,6 +412,7 @@ public class Crawler implements Runnable {
log.info("Updated " + infos[0].size() + " Videos.");
}
}
*/
public static class Video {
String id = "";

View File

@ -43,7 +43,7 @@ public class CrawlerThread implements Runnable {
}
}
crawl(todo.removeFirst());
if(todo.size() < 5 && !requested) {
if(todo.size() < parent.requestlimit && !requested) {
requested = true;
parent.request(this);
}

View File

@ -26,6 +26,8 @@ public class DB implements Runnable {
private Server serv = new Server(this);
private Thread randomrefill = null;
private int dbsize = 0;
private ArrayList<Video> tostorebuffer;
private int writebuffersize = 500;
public DB() {
try {
@ -52,20 +54,28 @@ public class DB implements Runnable {
log.info("Database is set up!");
}
serv.start();
refillbuffer();
//get db size
dbsize();
//config data
try {
writebuffersize = Integer.parseInt(Config.prop.getProperty("db.writebuffersize"));
} catch(NumberFormatException e) {
log.warn("could not read the number \"" + Config.prop.getProperty("db.writebuffersize") + "\" from the config file. db.writebuffersize");
}
tostorebuffer = new ArrayList<>(writebuffersize);
} catch (SQLException e) {
log.error("Error while connecting to the database! ", e);
}
}
private void dbsize() {
try {
ResultSet set = query("SELECT count(*) as count FROM `videos`;");
ResultSet set = query("SELECT count(*) as count FROM `videos`;");
if(set != null) {
if(set.next()) {
dbsize = set.getInt(1);
@ -75,12 +85,11 @@ public class DB implements Runnable {
e.printStackTrace();
}
}
public int getDBSize() {
return dbsize;
}
private void connect(boolean selectdb) {
try {
Class.forName("com.mysql.jdbc.Driver");//Treiber laden try this driver: com.mysql.cj.jdbc.Driver
@ -119,18 +128,24 @@ public class DB implements Runnable {
* save the list of videos to the DB
* @param input
*/
public void addVideos(List<Video> input) {
public void addVideos(List<Video> input, boolean force) {
//log.info("add " + input.size() + " videos");
if(input.size() > 0) {
dbsize += input.size();
if(input != null) {
if(input.size() > 0) {
tostorebuffer.addAll(input);
}
}
if(tostorebuffer.size() > writebuffersize || force) {
dbsize += tostorebuffer.size();
StringBuilder sb = new StringBuilder();
for(int i = 0; i< input.size(); i++) {
for(int i = 0; i < tostorebuffer.size(); i++) {
if(i > 0)
sb.append(',');
Video v = input.get(i);
Video v = tostorebuffer.get(i);
if(v != null)
sb.append("('").append(v.id).append("','").append(v.length).append("','").append(v.created).append("','").append(v.languageCode).append("','").append(v.categorie).append("','").append(v.title).append("','").append(v.channel).append("','").append(v.tags).append("') ");
}
tostorebuffer.clear();
if(sb.length() > 2) {
String qu = "INSERT IGNORE INTO `ytcrawler`.`videos`(`id`, `length`, `created`, `langcode`, `category`, `videotitle`, `channel`, `tags`) VALUES " + sb.toString();
update(qu);
@ -217,7 +232,7 @@ public class DB implements Runnable {
randomrefill.start();
}
}
public String getRandom() {
log.info("Get random Video");
if(randombuffer.size() < 10 ) {
@ -233,7 +248,7 @@ public class DB implements Runnable {
public int getRandomCount() {
return randombuffer.size();
}
public LinkedList<String> restoreTemp() {
ResultSet res = query("SELECT * FROM `ytcrawler`.`temp` LIMIT 500;");
LinkedList<String> out = new LinkedList<>();
@ -266,7 +281,7 @@ public class DB implements Runnable {
}
}
/**
* Stops the randomnes-Server and disconnect
*/
@ -275,6 +290,7 @@ public class DB implements Runnable {
try {
if(con != null) {
if(!con.isClosed()) {
addVideos(null, true);
con.close();
}
}

View File

@ -12,14 +12,14 @@ import org.json.JSONObject;
import de.mrbesen.telegram.MessageBuilder;
import de.mrbesen.telegram.TelegramAPI;
import de.mrbesen.telegram.commands.CommandHandler;
import de.mrbesen.telegram.commands.JSONCommandHandler;
import de.mrbesen.telegram.event.EventHandler;
import de.mrbesen.telegram.event.EventListener;
import de.mrbesen.telegram.event.events.UserSendMessageEvent;
import de.mrbesen.telegram.log.Log4JLog;
import de.mrbesen.telegram.objects.TUser;
public class Main implements CommandHandler, EventListener{
public class Main implements JSONCommandHandler, EventListener{
private ArrayList<String> admins = new ArrayList<>();//usernames of admins
private String adminstr = null;
@ -174,6 +174,12 @@ public class Main implements CommandHandler, EventListener{
mainthread.interrupt();
}
@Override
public boolean onCommand(TUser sender, String cmd, String[] args) {
//unused
return false;
}
@Override
public boolean onCommand(TUser sender, String cmd, String[] args, JSONObject j) {
if(cmd.startsWith("random")) {
@ -210,6 +216,8 @@ public class Main implements CommandHandler, EventListener{
for(String s : cra.getProfiling()) {
sb.append(s).append('\n');
}
if(sb.length() == 0)
sb.append("No Data.");
sender.sendMessage(sb.toString());
return true;
}

View File

@ -27,6 +27,7 @@ public class Profiler {
this.profilingMap.clear();
this.profilingSection = "";
this.sectionList.clear();
start = -1;
}
/**
@ -36,6 +37,7 @@ public class Profiler {
if (this.profilingEnabled) {
if(start == -1)
start = System.nanoTime();
if (this.profilingSection.length() > 0) {
this.profilingSection = this.profilingSection + ".";
}
@ -43,6 +45,7 @@ public class Profiler {
this.profilingSection = this.profilingSection + name;
this.sectionList.add(this.profilingSection);
this.timestampList.add(Long.valueOf(System.nanoTime()));
//System.out.println("started section: " + name + " currently in: " + profilingSection);
}
}
@ -51,20 +54,23 @@ public class Profiler {
*/
public void endSection() {
if (this.profilingEnabled) {
long start = System.nanoTime();
long stop = ((Long) this.timestampList.remove(this.timestampList.size() - 1)).longValue();
this.sectionList.remove(this.sectionList.size() - 1);
long delta = start - stop;
try {
long start = System.nanoTime();
long stop = ((Long) this.timestampList.remove(this.timestampList.size() - 1)).longValue();
this.sectionList.remove(this.sectionList.size() - 1);
long delta = start - stop;
if (this.profilingMap.containsKey(this.profilingSection)) {
this.profilingMap.put(this.profilingSection,
Long.valueOf(((Long) this.profilingMap.get(this.profilingSection)).longValue() + delta));
} else {
this.profilingMap.put(this.profilingSection, Long.valueOf(delta));
if (this.profilingMap.containsKey(this.profilingSection)) {
this.profilingMap.put(this.profilingSection,
Long.valueOf(((Long) this.profilingMap.get(this.profilingSection)).longValue() + delta));
} else {
this.profilingMap.put(this.profilingSection, Long.valueOf(delta));
}
this.profilingSection = this.sectionList.isEmpty() ? "" : (String) this.sectionList.get(this.sectionList.size() - 1);
} catch (Exception e ) {
e.printStackTrace();
}
this.profilingSection = this.sectionList.isEmpty() ? ""
: (String) this.sectionList.get(this.sectionList.size() - 1);
}
}
@ -78,10 +84,8 @@ public class Profiler {
}
public List<Profiler.Result> getProfilingData(String profilerName) {
if (!this.profilingEnabled) {
return Collections.<Profiler.Result>emptyList();
} else {
List<Profiler.Result> out = Lists.<Profiler.Result>newArrayList();
if (this.profilingEnabled) {
List<Profiler.Result> out = Lists.<Profiler.Result>newArrayList();
long totaltime = System.nanoTime() - start;
//calculate percentage of each child section
@ -93,44 +97,26 @@ public class Profiler {
out.add(new Profiler.Result(prfiler_name, totaltimep, subsectiontime));
}
}
for (String key : this.profilingMap.keySet()) {
this.profilingMap.put(key, Long.valueOf(((Long) this.profilingMap.get(key)).longValue() * 999L / 1000L));
}
return out;
} else {
return Collections.<Profiler.Result>emptyList();
}
}
public LinkedList<String> getTreeView() {
/*for(String key : profilingMap.keySet()) {
System.out.println(key);
}*/
//end all sections
try {
/*LinkedList<String> sections = new LinkedList<>();
while(!sectionList.isEmpty()) {
String current = sectionList.get(sectionList.size() -1);
if(current != null) {
sections.add(current.substring(current.lastIndexOf('.')+1));
endSection();
}
}
for(String section : sections) {
startSection(section);
}
*/
return getTreeView("root", "");
} catch(OutOfMemoryError e) {
e.printStackTrace();
}
return null;
return getTreeView("root", "");
}
private LinkedList<String> getTreeView(String name, String leading) {
LinkedList<String> out = new LinkedList<>();
if(new Exception().getStackTrace().length > 20)//prevent stack overflow debug only
if(new Exception().getStackTrace().length > 50)//prevent stack overflow debug only
return out;
for(Result res : getProfilingData(name)) {
out.add(leading + res.profilerName + " " + (res.time/10000000)/100D + "s " + form(res.totalUsePercentage) + "% ");
out.addAll(getTreeView(res.profilerName, leading + "\t"));

View File

@ -7,6 +7,7 @@ import java.net.URL;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.LinkedList;
import java.util.List;
@ -35,7 +36,7 @@ public class YoutubeAPI {
return (Video) getInfos(id)[0].get(0);
}
public LinkedList<Video>[] getInfos(List<String> ids) {
public List<Video>[] getInfos(List<String> ids) {
log.info("get " + ids.size() + " infos");
if(ids.isEmpty())
return null;
@ -47,8 +48,8 @@ public class YoutubeAPI {
return getInfos(sb.toString());
}
public LinkedList<Video>[] getInfos(String idlist) {
LinkedList<Video> out = new LinkedList<Video>();
public List<Video>[] getInfos(String idlist) {
ArrayList<Video> out = new ArrayList<Video>(idlist.length() / 12);//approximierte vorraussichtliche länge
LinkedList<Video> livestr = new LinkedList<Video>();
String nextpage = "";
do {
@ -138,7 +139,7 @@ public class YoutubeAPI {
}
} while(!nextpage.equals(""));
log.info("got " + (out.size() + livestr.size()) + " infos");
return new LinkedList[] {out, livestr};
return new List[] {out, livestr};
}
private String removeunwanted(String in) {