fehler behoben, profiler, dellete double entfehrnt
This commit is contained in:
parent
33bdc8cf5c
commit
2080602278
|
@ -22,12 +22,11 @@ public class Crawler implements Runnable {
|
||||||
|
|
||||||
private LinkedList<String> toSave = new LinkedList<>();//all found ytids, witch need to be analysed
|
private LinkedList<String> toSave = new LinkedList<>();//all found ytids, witch need to be analysed
|
||||||
private LinkedList<String> toCrawl = new LinkedList<>();//all videos tu crawl
|
private LinkedList<String> toCrawl = new LinkedList<>();//all videos tu crawl
|
||||||
private LinkedList<String> toknown = new LinkedList<>();//list with all videos, to test if they are allready known, if not they are moved to tocrawle
|
//private LinkedList<String> toknown = new LinkedList<>();//list with all videos, to test if they are allready known, if not they are moved to tocrawle
|
||||||
private List<CrawlerThread> threads;//list of all threads
|
private List<CrawlerThread> threads;//list of all threads
|
||||||
private List<CrawlerThread> requested = new LinkedList<>();
|
private List<CrawlerThread> requested = new LinkedList<>();
|
||||||
|
|
||||||
private static DateFormat dateform = new SimpleDateFormat("dd-MM-yyyy HH:mm:ss");
|
private static DateFormat dateform = new SimpleDateFormat("dd-MM-yyyy HH:mm:ss");
|
||||||
private String currentstate = "undefined";
|
|
||||||
private long start;
|
private long start;
|
||||||
|
|
||||||
private boolean crawl = true;
|
private boolean crawl = true;
|
||||||
|
@ -38,17 +37,11 @@ public class Crawler implements Runnable {
|
||||||
private YoutubeAPI api = new YoutubeAPI();
|
private YoutubeAPI api = new YoutubeAPI();
|
||||||
private File crawlfile = new File("crawl.txt");
|
private File crawlfile = new File("crawl.txt");
|
||||||
private Logger log = Logger.getLogger(this.getClass().getName());
|
private Logger log = Logger.getLogger(this.getClass().getName());
|
||||||
|
private Profiler profiler = new Profiler();
|
||||||
|
|
||||||
private int maxvideostotest = 100;
|
private int startup = 2;//to keep the beginning cool - counter how often the program is allowed to enter startup sleep
|
||||||
private int startup = 2;//to keep the beginning cool
|
|
||||||
|
|
||||||
public Crawler() {
|
public Crawler() {
|
||||||
try {
|
|
||||||
maxvideostotest = Integer.parseInt(Config.prop.getProperty("crawler.maxvideos"));
|
|
||||||
} catch(NumberFormatException e) {
|
|
||||||
log.warn("could not read the number \"" + Config.prop.getProperty("crawler.maxvideos") + "\" from the config file. maxvideo");
|
|
||||||
maxvideostotest = 100;
|
|
||||||
}
|
|
||||||
try {
|
try {
|
||||||
jobspeerthread = Integer.parseInt(Config.prop.getProperty("crawler.jobspeerthread"));
|
jobspeerthread = Integer.parseInt(Config.prop.getProperty("crawler.jobspeerthread"));
|
||||||
} catch(NumberFormatException e) {
|
} catch(NumberFormatException e) {
|
||||||
|
@ -62,8 +55,13 @@ public class Crawler implements Runnable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized void addtoCrawl(String videoid) {
|
public synchronized void addtoCrawl(String videoid) {
|
||||||
if(! (toCrawl.contains(videoid) || toknown.contains(videoid)))
|
//if(! (toCrawl.contains(videoid) || toknown.contains(videoid)))
|
||||||
toknown.add(videoid);
|
//toknown.add(videoid);
|
||||||
|
if(toCrawl.contains(videoid)) {
|
||||||
|
LinkedList<String> str = new LinkedList<String>();
|
||||||
|
str.add(videoid);
|
||||||
|
db.storeTemp(str);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isCrawling() {
|
public boolean isCrawling() {
|
||||||
|
@ -89,13 +87,17 @@ public class Crawler implements Runnable {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
currentstate = "loading crawlfile";
|
profiler.profilingEnabled = true;
|
||||||
|
profiler.clearProfiling();
|
||||||
|
profiler.startSection("root");
|
||||||
|
profiler.startSection("startup");
|
||||||
|
profiler.startSection("loadingcrawlfile");
|
||||||
start = System.currentTimeMillis();
|
start = System.currentTimeMillis();
|
||||||
log.info("Try to load crawlfile");
|
log.info("Try to load crawlfile");
|
||||||
if(crawlfile.exists()) {
|
if(crawlfile.exists()) {
|
||||||
try {
|
try {
|
||||||
Scanner in = new Scanner(crawlfile);
|
Scanner in = new Scanner(crawlfile);
|
||||||
boolean crawl = true;//section of file
|
//boolean crawl = true;//section of file
|
||||||
while(in.hasNextLine()) {
|
while(in.hasNextLine()) {
|
||||||
String line = in.nextLine();
|
String line = in.nextLine();
|
||||||
if(line == null) {
|
if(line == null) {
|
||||||
|
@ -103,13 +105,13 @@ public class Crawler implements Runnable {
|
||||||
} else {
|
} else {
|
||||||
if(!line.isEmpty()) {
|
if(!line.isEmpty()) {
|
||||||
if(line.equals("-")) {//section delimiter
|
if(line.equals("-")) {//section delimiter
|
||||||
crawl = false;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
if(crawl) {
|
//if(crawl) {
|
||||||
toCrawl.add(line);
|
toCrawl.add(line);
|
||||||
} else {
|
/*} else {
|
||||||
toknown.add(line);
|
toknown.add(line);
|
||||||
}
|
}*/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -120,8 +122,7 @@ public class Crawler implements Runnable {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
profiler.endStartSection("populateThreads");
|
||||||
currentstate = "populate threads";
|
|
||||||
//populate threads
|
//populate threads
|
||||||
int threadcount = 4;
|
int threadcount = 4;
|
||||||
try {
|
try {
|
||||||
|
@ -137,24 +138,28 @@ public class Crawler implements Runnable {
|
||||||
threads.add(thr);
|
threads.add(thr);
|
||||||
thr.thread.start();
|
thr.thread.start();
|
||||||
}
|
}
|
||||||
currentstate = "delete Double";
|
profiler.endStartSection("deleteDouble");
|
||||||
long lastdoubledelete = System.currentTimeMillis();
|
long lastdoubledelete = System.currentTimeMillis();
|
||||||
db.deleteDouble();
|
//db.deleteDouble();
|
||||||
currentstate = "crawl";
|
profiler.endSection();//startup
|
||||||
while(crawl) {
|
profiler.endStartSection("main");
|
||||||
log.info("to Crawl: " + toCrawl.size() + " known: " + toknown.size() + " Time: " + dateform.format(new Date()));
|
boolean savedall = false;//ein 2. durch lauf, um wirklich alles zu speichern
|
||||||
|
while(crawl || savedall) {
|
||||||
|
log.info("to Crawl: " + toCrawl.size() + /*" known: " + toknown.size() +*/ " Time: " + dateform.format(new Date()));
|
||||||
|
if(!crawl)
|
||||||
|
savedall = true;
|
||||||
try {
|
try {
|
||||||
//fullfill request
|
//fullfill request
|
||||||
|
profiler.startSection("fullfill request");
|
||||||
while(!requested.isEmpty() && !toCrawl.isEmpty() && crawl) {
|
while(!requested.isEmpty() && !toCrawl.isEmpty() && crawl) {
|
||||||
log.info("fullfill request");
|
log.info("fullfill request");
|
||||||
currentstate = "fullfill requests";
|
|
||||||
send(requested.remove(0));
|
send(requested.remove(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
//kindof idle
|
//kindof idle
|
||||||
|
profiler.endStartSection("idle");
|
||||||
while(toCrawl.size() > (jobspeerthread * threads.size()) && crawl && requested.isEmpty()) {
|
while(toCrawl.size() > (jobspeerthread * threads.size()) && crawl && requested.isEmpty()) {
|
||||||
startup = 0;//stop startup count
|
startup = 0;//stop startup count
|
||||||
currentstate = "idle";
|
|
||||||
if((System.currentTimeMillis() - lastdoubledelete) / 1000 > 1800) {
|
if((System.currentTimeMillis() - lastdoubledelete) / 1000 > 1800) {
|
||||||
db.deleteDouble();
|
db.deleteDouble();
|
||||||
lastdoubledelete = System.currentTimeMillis();
|
lastdoubledelete = System.currentTimeMillis();
|
||||||
|
@ -169,13 +174,12 @@ public class Crawler implements Runnable {
|
||||||
// updateDB();
|
// updateDB();
|
||||||
}
|
}
|
||||||
//nothing left?
|
//nothing left?
|
||||||
if(toknown.isEmpty() && toCrawl.isEmpty() && requested.size() == threads.size()) {//very uncommon
|
if(/*toknown.isEmpty() && */toCrawl.isEmpty() && requested.size() == threads.size()) {//very uncommon
|
||||||
log.warn("nothing left to crawl");
|
log.warn("nothing left to crawl");
|
||||||
crawl = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//refil the tocrawl list.
|
//refil the tocrawl list.
|
||||||
if(!toknown.isEmpty()) {
|
/*if(!toknown.isEmpty()) {
|
||||||
//check in db for known videos
|
//check in db for known videos
|
||||||
log.info("Checking the DB");
|
log.info("Checking the DB");
|
||||||
currentstate = "get new tocrawl";
|
currentstate = "get new tocrawl";
|
||||||
|
@ -188,50 +192,69 @@ public class Crawler implements Runnable {
|
||||||
toCrawl.addAll(db.checkvideos(tocheck));
|
toCrawl.addAll(db.checkvideos(tocheck));
|
||||||
}
|
}
|
||||||
// listlock.writeLock().unlock();
|
// listlock.writeLock().unlock();
|
||||||
}
|
}
|
||||||
if(toknown.size() < threadcount * jobspeerthread * 20 && crawl) {
|
while(toknown.size() < threadcount * jobspeerthread * 20 && crawl) {
|
||||||
currentstate = "restoretemp";
|
currentstate = "restoretemp";
|
||||||
log.info("restoreTemp");
|
log.info("restoreTemp");
|
||||||
LinkedList<String> rest = db.restoreTemp();
|
LinkedList<String> rest = db.restoreTemp();
|
||||||
toknown.addAll(rest);
|
toknown.addAll(rest);
|
||||||
|
}*/
|
||||||
|
{
|
||||||
|
profiler.endStartSection("loadCrawl");
|
||||||
|
boolean joined = true;
|
||||||
|
while(toCrawl.size() < ( threadcount * jobspeerthread * 3) && crawl) {
|
||||||
|
if(joined) {
|
||||||
|
joined = false;
|
||||||
|
log.info("loadCrawl");
|
||||||
|
}
|
||||||
|
LinkedList<String> rest = db.restoreTemp();
|
||||||
|
toCrawl.addAll(rest);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//writing crawlfile
|
//writing crawlfile
|
||||||
|
profiler.endStartSection("writingcrawlfile");
|
||||||
log.info("Writing Crawlfile");
|
log.info("Writing Crawlfile");
|
||||||
currentstate = "writing crawlfile";
|
|
||||||
// listlock.writeLock().lock();
|
|
||||||
try {
|
try {
|
||||||
PrintWriter p = new PrintWriter(new BufferedWriter(new FileWriter(crawlfile)));
|
PrintWriter p = new PrintWriter(new BufferedWriter(new FileWriter(crawlfile)));
|
||||||
for(String t : toCrawl) {
|
for(String t : toCrawl) {
|
||||||
p.println(t);
|
p.println(t);
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
p.println("-");
|
p.println("-");
|
||||||
for(String t : toknown) {
|
for(String t : toknown) {
|
||||||
p.println(t);
|
p.println(t);
|
||||||
}
|
}*/
|
||||||
p.close();
|
p.close();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
log.error("Error writing crawlfile.", e);
|
log.error("Error writing crawlfile.", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
//get reports
|
//get reports
|
||||||
currentstate = "get report";
|
profiler.endStartSection("getreport");
|
||||||
log.info("get report");
|
log.info("get report");
|
||||||
int count = 0;
|
|
||||||
for (CrawlerThread crawlerThread : threads) {
|
for (CrawlerThread crawlerThread : threads) {
|
||||||
currentstate = "get report: " + crawlerThread.thread.getName();
|
String threadname = crawlerThread.thread.getName();
|
||||||
|
profiler.startSection("T" + threadname.substring(threadname.lastIndexOf('#')+1));
|
||||||
LinkedList<String>[] report = crawlerThread.report();
|
LinkedList<String>[] report = crawlerThread.report();
|
||||||
crawlcount+= report[0].size();
|
crawlcount+= report[0].size();
|
||||||
toSave.addAll(report[0]);
|
toSave.addAll(report[0]);
|
||||||
crawlerThread.crawled.clear();
|
crawlerThread.crawled.clear();
|
||||||
|
|
||||||
while(report[1].size() > 1) {//2 videos werden ggf. gelöscht ohne gesehen zu werden.
|
|
||||||
LinkedList<String> store = new LinkedList<>();
|
|
||||||
try {
|
|
||||||
while(!report[1].isEmpty() && store.size() < 50) {
|
|
||||||
store.add(report[1].removeFirst());
|
|
||||||
|
|
||||||
count++;
|
int count = 0;
|
||||||
|
while(report[1].size() > 1) {//2 videos werden ggf. gelöscht ohne gesehen zu werden.
|
||||||
|
LinkedList<String> store = null;
|
||||||
|
try {
|
||||||
|
if(report[1].size() <= 50) {
|
||||||
|
store = report[1];
|
||||||
|
count += report[1].size();
|
||||||
|
report[1].clear();
|
||||||
|
} else {
|
||||||
|
store = new LinkedList<>();
|
||||||
|
while(!report[1].isEmpty() && store.size() < 50) {
|
||||||
|
store.add(report[1].removeFirst());
|
||||||
|
count++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} catch(NoSuchElementException ignored) {//concurrentmodification fuckery
|
} catch(NoSuchElementException ignored) {//concurrentmodification fuckery
|
||||||
log.info("no suchelement bla");
|
log.info("no suchelement bla");
|
||||||
|
@ -239,10 +262,10 @@ public class Crawler implements Runnable {
|
||||||
db.storeTemp(store);
|
db.storeTemp(store);
|
||||||
}
|
}
|
||||||
log.info(count + " videos added.");
|
log.info(count + " videos added.");
|
||||||
crawlerThread.found.clear();
|
profiler.endSection();
|
||||||
crawlerThread.thread.interrupt();//free from lock
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
profiler.endStartSection("debug");
|
||||||
long runtimes = (System.currentTimeMillis() - start) / 1000;
|
long runtimes = (System.currentTimeMillis() - start) / 1000;
|
||||||
if(runtimes < 0)
|
if(runtimes < 0)
|
||||||
runtimes = 1;
|
runtimes = 1;
|
||||||
|
@ -250,7 +273,7 @@ public class Crawler implements Runnable {
|
||||||
Main.getMain().broadcastAdmin(vidps + "v/s " + crawlcount + " total V");
|
Main.getMain().broadcastAdmin(vidps + "v/s " + crawlcount + " total V");
|
||||||
|
|
||||||
//save to db
|
//save to db
|
||||||
currentstate = "save to DB";
|
profiler.endStartSection("save2DB");
|
||||||
log.info("save " + toSave.size() + " videos to DB.");
|
log.info("save " + toSave.size() + " videos to DB.");
|
||||||
while(!toSave.isEmpty()) {
|
while(!toSave.isEmpty()) {
|
||||||
LinkedList<String> videoids = new LinkedList<>();
|
LinkedList<String> videoids = new LinkedList<>();
|
||||||
|
@ -258,19 +281,26 @@ public class Crawler implements Runnable {
|
||||||
videoids.add(toSave.remove(0));
|
videoids.add(toSave.remove(0));
|
||||||
}
|
}
|
||||||
if(videoids.size() > 0) {
|
if(videoids.size() > 0) {
|
||||||
|
profiler.startSection("getinfo");
|
||||||
List<Video> videos = api.getInfos(videoids)[0];
|
List<Video> videos = api.getInfos(videoids)[0];
|
||||||
|
profiler.endStartSection("sendtoDB");
|
||||||
db.addVideos(videos);
|
db.addVideos(videos);
|
||||||
|
profiler.endSection();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
profiler.endSection();
|
||||||
|
|
||||||
//at the beginning there is maybe just one video to crawl, so keep it calm.
|
//at the beginning there is maybe just one video to crawl, so keep it calm.
|
||||||
if(startup > 0) {
|
if(startup > 0) {
|
||||||
|
profiler.startSection("startupsleep");
|
||||||
startup --;
|
startup --;
|
||||||
currentstate = "startup sleep";
|
|
||||||
log.info("startup sleep");
|
log.info("startup sleep");
|
||||||
try {
|
try {
|
||||||
Thread.sleep(2000);
|
Thread.sleep(2000);
|
||||||
} catch(InterruptedException e) {}
|
} catch(InterruptedException e) {}
|
||||||
|
finally {
|
||||||
|
profiler.endSection();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} catch(Throwable t) {
|
} catch(Throwable t) {
|
||||||
log.warn("exception in Crawler!", t);
|
log.warn("exception in Crawler!", t);
|
||||||
|
@ -283,10 +313,20 @@ public class Crawler implements Runnable {
|
||||||
Main.getMain().stop();
|
Main.getMain().stop();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
currentstate = "delete Double";
|
profiler.endStartSection("cleanup");
|
||||||
db.deleteDouble();
|
profiler.startSection("deleteDouble");
|
||||||
currentstate = "stop DB";
|
//db.deleteDouble();
|
||||||
|
profiler.endStartSection("stopDB");
|
||||||
db.stop();
|
db.stop();
|
||||||
|
profiler.endSection();
|
||||||
|
profiler.endSection();//root
|
||||||
|
log.info("Profiler:");
|
||||||
|
//for (Result res : profiler.getProfilingData("root")) {
|
||||||
|
// log.info(res.profilerName + " " + res.usePercentage + "% total: " + res.usePercentage + "%");
|
||||||
|
//}
|
||||||
|
for(String s : profiler.getTreeView()) {
|
||||||
|
log.info(s);
|
||||||
|
}
|
||||||
|
|
||||||
//end
|
//end
|
||||||
long runtimes = (System.currentTimeMillis() - start) / 1000;
|
long runtimes = (System.currentTimeMillis() - start) / 1000;
|
||||||
|
@ -295,7 +335,7 @@ public class Crawler implements Runnable {
|
||||||
int runtimem = (int) (runtimes / 60);
|
int runtimem = (int) (runtimes / 60);
|
||||||
float vidps = (crawlcount / (float) runtimes);//videos per second
|
float vidps = (crawlcount / (float) runtimes);//videos per second
|
||||||
log.info("Crawling Stopped. Runtime: " + runtimem + "min and " + crawlcount + " videos crawled. ( " + vidps + " v/s )");
|
log.info("Crawling Stopped. Runtime: " + runtimem + "min and " + crawlcount + " videos crawled. ( " + vidps + " v/s )");
|
||||||
currentstate = "ended.";
|
Main.getMain().stopcallback();
|
||||||
}
|
}
|
||||||
|
|
||||||
public DB getDB() {
|
public DB getDB() {
|
||||||
|
@ -314,21 +354,27 @@ public class Crawler implements Runnable {
|
||||||
int runtimem = (int) (runtimes / 60);
|
int runtimem = (int) (runtimes / 60);
|
||||||
String out = "";
|
String out = "";
|
||||||
out += "ToCrawl: " + toCrawl.size();
|
out += "ToCrawl: " + toCrawl.size();
|
||||||
out += "\nToknown: " + toknown.size();
|
//out += "\nToknown: " + toknown.size();
|
||||||
out += "\nToSave: " + toSave.size();
|
out += "\nToSave: " + toSave.size();
|
||||||
out += "\nrequested: " + requested.size();
|
out += "\nrequested: " + requested.size();
|
||||||
out += "\nRandomBuffer: " + db.getRandomCount();
|
out += "\nRandomBuffer: " + db.getRandomCount();
|
||||||
out += "\nRuntime: " + runtimem + "min and " + crawlcount + " videos crawled. ( " + vidps + " v/s )";
|
out += "\nRuntime: " + runtimem + "min and " + crawlcount + " videos crawled. ( " + vidps + " v/s )";
|
||||||
out += "\nState: " + currentstate;
|
out += "\nprofiler: " + profiler.getNameOfLastSection();
|
||||||
out += "\nDBSize: " + db.getDBSize();
|
out += "\nDBSize: " + db.getDBSize();
|
||||||
out += "\nThread Nr, todo size, locked, requested, crawledsize, foundsize";
|
if(threads != null) {
|
||||||
for (int i = 0; i < threads.size(); i++) {
|
out += "\nThread Nr, todo size, requested, crawledsize, foundsize";
|
||||||
CrawlerThread thre = threads.get(i);
|
for (int i = 0; i < threads.size(); i++) {
|
||||||
out += "\n " + i + " " + thre.todo.size() + " " + thre.lockforreport + " " + thre.requested + " " + thre.crawled.size() + " " + thre.found.size();
|
CrawlerThread thre = threads.get(i);
|
||||||
|
out += "\n " + i + " " + (thre.lockforreport ? "\uD83D\uDD12" : "\uD83D\uDD13") + " " + thre.todo.size() + " " + thre.requested + " " + thre.crawled.size() + " " + thre.found.size();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public LinkedList<String> getProfiling() {
|
||||||
|
return profiler.getTreeView();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Updates old entrys of the DB. currently unused.
|
* Updates old entrys of the DB. currently unused.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -69,7 +69,12 @@ public class CrawlerThread implements Runnable {
|
||||||
*/
|
*/
|
||||||
LinkedList<String>[] report() {
|
LinkedList<String>[] report() {
|
||||||
lockforreport = true;
|
lockforreport = true;
|
||||||
return new LinkedList[] {(LinkedList) crawled.clone(), (LinkedList) found.clone()};
|
LinkedList[] out = new LinkedList[] {(LinkedList) crawled, (LinkedList) found};
|
||||||
|
crawled = new LinkedList<>();
|
||||||
|
found = new LinkedList<>();
|
||||||
|
lockforreport = false;
|
||||||
|
thread.interrupt();
|
||||||
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void crawl(String videoid) {
|
private void crawl(String videoid) {
|
||||||
|
|
|
@ -255,6 +255,8 @@ public class DB implements Runnable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void storeTemp(LinkedList<String> strings) {
|
public void storeTemp(LinkedList<String> strings) {
|
||||||
|
if(strings == null)
|
||||||
|
return;
|
||||||
if(!strings.isEmpty()) {
|
if(!strings.isEmpty()) {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
for(String s : strings) {
|
for(String s : strings) {
|
||||||
|
|
|
@ -2,17 +2,21 @@ package de.mrbesen.youtubecrawler;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.lang.Thread.State;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import java.util.Scanner;
|
import java.util.Scanner;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
|
||||||
|
import de.mrbesen.telegram.MessageBuilder;
|
||||||
import de.mrbesen.telegram.TelegramAPI;
|
import de.mrbesen.telegram.TelegramAPI;
|
||||||
import de.mrbesen.telegram.commands.CommandHandler;
|
import de.mrbesen.telegram.commands.CommandHandler;
|
||||||
import de.mrbesen.telegram.event.EventHandler;
|
import de.mrbesen.telegram.event.EventHandler;
|
||||||
import de.mrbesen.telegram.event.EventListener;
|
import de.mrbesen.telegram.event.EventListener;
|
||||||
import de.mrbesen.telegram.event.events.UserSendMessageEvent;
|
import de.mrbesen.telegram.event.events.UserSendMessageEvent;
|
||||||
|
import de.mrbesen.telegram.log.Log4JLog;
|
||||||
import de.mrbesen.telegram.objects.TUser;
|
import de.mrbesen.telegram.objects.TUser;
|
||||||
|
|
||||||
public class Main implements CommandHandler, EventListener{
|
public class Main implements CommandHandler, EventListener{
|
||||||
|
@ -20,12 +24,14 @@ public class Main implements CommandHandler, EventListener{
|
||||||
private ArrayList<String> admins = new ArrayList<>();//usernames of admins
|
private ArrayList<String> admins = new ArrayList<>();//usernames of admins
|
||||||
private String adminstr = null;
|
private String adminstr = null;
|
||||||
private long setadminstr = -1;
|
private long setadminstr = -1;
|
||||||
private static String abc = "abcdefghijklmnopqrstuvwxyz";
|
private static String abc = "abcdefghijklmnopqrstuvwxyz";//used for random string generation
|
||||||
private Logger log = Logger.getLogger(this.getClass().getName());
|
private Logger log = Logger.getLogger(this.getClass().getName());
|
||||||
private TelegramAPI tapi;
|
private TelegramAPI tapi;
|
||||||
private Thread mainthread;
|
private Thread mainthread;
|
||||||
private static Main main;
|
private static Main main;
|
||||||
private boolean startcrawler = true;
|
private boolean startcrawler = true;
|
||||||
|
private Crawler cra;
|
||||||
|
private Thread crawlerthread;
|
||||||
|
|
||||||
public static Main getMain() {
|
public static Main getMain() {
|
||||||
return main;
|
return main;
|
||||||
|
@ -38,16 +44,15 @@ public class Main implements CommandHandler, EventListener{
|
||||||
startcrawl = false;
|
startcrawl = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
main = new Main(startcrawl);
|
main = new Main(startcrawl);
|
||||||
main.run();
|
main.run();
|
||||||
}
|
}
|
||||||
|
|
||||||
public Main(boolean startcra) {
|
public Main(boolean startcra) {
|
||||||
startcrawler = startcra;
|
startcrawler = startcra;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Crawler cra;
|
|
||||||
|
|
||||||
private void run() {
|
private void run() {
|
||||||
mainthread = Thread.currentThread();
|
mainthread = Thread.currentThread();
|
||||||
|
@ -61,9 +66,9 @@ public class Main implements CommandHandler, EventListener{
|
||||||
|
|
||||||
//starting crawler
|
//starting crawler
|
||||||
cra = new Crawler();
|
cra = new Crawler();
|
||||||
Thread t = new Thread(cra, "Crawler");
|
crawlerthread = new Thread(cra, "Crawler");
|
||||||
if(startcrawler) {
|
if(startcrawler) {
|
||||||
t.start();
|
crawlerthread.start();
|
||||||
}
|
}
|
||||||
|
|
||||||
//starting BOT API
|
//starting BOT API
|
||||||
|
@ -72,9 +77,11 @@ public class Main implements CommandHandler, EventListener{
|
||||||
tapi.getCommandManager().registerCommand("admin", this);
|
tapi.getCommandManager().registerCommand("admin", this);
|
||||||
tapi.getCommandManager().registerCommand("stats", this);
|
tapi.getCommandManager().registerCommand("stats", this);
|
||||||
tapi.getCommandManager().registerCommand("stop", this);
|
tapi.getCommandManager().registerCommand("stop", this);
|
||||||
|
tapi.getCommandManager().registerCommand("profiler", this);
|
||||||
tapi.getEventManager().registerEvent(this);
|
tapi.getEventManager().registerEvent(this);
|
||||||
tapi.setHelpText("Send the command /random to get a random video.");
|
tapi.setHelpText("Send the command /random to get a random video.");
|
||||||
tapi.setUpdateInterval(2000);
|
tapi.setUpdateInterval(2000);
|
||||||
|
tapi.setLog(new Log4JLog());
|
||||||
tapi.start();
|
tapi.start();
|
||||||
|
|
||||||
//load admins
|
//load admins
|
||||||
|
@ -94,10 +101,9 @@ public class Main implements CommandHandler, EventListener{
|
||||||
//CLI
|
//CLI
|
||||||
Scanner s = new Scanner(System.in);
|
Scanner s = new Scanner(System.in);
|
||||||
String in;
|
String in;
|
||||||
while((in= s.nextLine()) != null && t.isAlive()) {
|
while((in= s.nextLine()) != null && crawlerthread.isAlive()) {
|
||||||
if(in.equalsIgnoreCase("stop")) {
|
if(in.equalsIgnoreCase("stop")) {
|
||||||
stop();
|
stop();
|
||||||
break;
|
|
||||||
} else if(in.equalsIgnoreCase("add")) {
|
} else if(in.equalsIgnoreCase("add")) {
|
||||||
log.info("please enter ytid:");
|
log.info("please enter ytid:");
|
||||||
String id = s.nextLine().trim();
|
String id = s.nextLine().trim();
|
||||||
|
@ -113,31 +119,76 @@ public class Main implements CommandHandler, EventListener{
|
||||||
} else if(in.equalsIgnoreCase("crastop")) {
|
} else if(in.equalsIgnoreCase("crastop")) {
|
||||||
log.info("Stop crawler");
|
log.info("Stop crawler");
|
||||||
cra.stop();
|
cra.stop();
|
||||||
|
} else if(in.equalsIgnoreCase("profiler")) {
|
||||||
|
for(String profline : cra.getProfiling()) {
|
||||||
|
log.info(profline);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s.close();
|
s.close();
|
||||||
log.info("Terminated.");
|
log.info("Terminated.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String format(String in, int length) {
|
||||||
|
while(in.length() < length) {
|
||||||
|
in += ' ';
|
||||||
|
}
|
||||||
|
|
||||||
|
if(in.length() > length) {
|
||||||
|
in = in.substring(0, length);
|
||||||
|
}
|
||||||
|
return in;
|
||||||
|
}
|
||||||
|
|
||||||
public void stop() {
|
public void stop() {
|
||||||
log.info("Stop.");
|
log.info("Stop.");
|
||||||
cra.stop();
|
cra.stop();
|
||||||
log.info("cra stopped");
|
if(startcrawler) {
|
||||||
try {
|
new Thread(new Runnable() {
|
||||||
Thread.sleep(100);
|
@Override
|
||||||
} catch(InterruptedException e) {
|
public void run() {
|
||||||
e.printStackTrace();
|
int count = 0;
|
||||||
|
while(crawlerthread.isAlive()) {
|
||||||
|
try {
|
||||||
|
Thread.sleep(20000);
|
||||||
|
} catch(InterruptedException ignored) {}
|
||||||
|
count = 0;
|
||||||
|
for(Thread t : Thread.getAllStackTraces().keySet()) {
|
||||||
|
String name = format(t.getName(), 15);
|
||||||
|
log.info(name + ":\t" + t.getState().name());
|
||||||
|
if(t.getState() == State.RUNNABLE) {
|
||||||
|
count ++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.info("count: " + count);
|
||||||
|
}
|
||||||
|
log.info("Stoped Shutdown Watchdog");
|
||||||
|
}
|
||||||
|
}, "Shutdown Watchdog").start();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void stopcallback() {
|
||||||
tapi.stop();
|
tapi.stop();
|
||||||
log.info("tapi stopped");
|
log.info("tapi stopped");
|
||||||
mainthread.interrupt();
|
mainthread.interrupt();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean onCommand(TUser sender, String cmd, String[] args) {
|
public boolean onCommand(TUser sender, String cmd, String[] args, JSONObject j) {
|
||||||
if(cmd.equals("random")) {
|
if(cmd.startsWith("random")) {
|
||||||
String ytid = cra.getDB().getRandom();
|
String ytid = cra.getDB().getRandom();
|
||||||
sender.sendMessage("https://youtube.com/watch?v=" + ytid);
|
int chatid = 0;
|
||||||
|
try {
|
||||||
|
//log.debug(j.toString());
|
||||||
|
chatid = j.getJSONObject("chat").getInt("id");
|
||||||
|
} catch(Throwable t) { t.printStackTrace(); }
|
||||||
|
//log.info("chatid: " + chatid);
|
||||||
|
if(chatid != 0) {
|
||||||
|
tapi.sendMessage(new MessageBuilder().setText("https://youtube.com/watch?v=" + ytid).setReciver(chatid).build());
|
||||||
|
} else {
|
||||||
|
sender.sendMessage("https://youtube.com/watch?v=" + ytid);
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
} else if(cmd.equals("admin")) {
|
} else if(cmd.equals("admin")) {
|
||||||
if(admins.contains(sender.getName())) {
|
if(admins.contains(sender.getName())) {
|
||||||
|
@ -153,14 +204,25 @@ public class Main implements CommandHandler, EventListener{
|
||||||
sender.sendMessage(cra.getStats());
|
sender.sendMessage(cra.getStats());
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
} else if(cmd.equals("stop")) {
|
} else if(cmd.equals("profiler")) {
|
||||||
|
if(admins.contains(sender.getName())) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for(String s : cra.getProfiling()) {
|
||||||
|
sb.append(s).append('\n');
|
||||||
|
}
|
||||||
|
sender.sendMessage(sb.toString());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*else if(cmd.equals("stop")) {
|
||||||
if(admins.contains(sender.getName())) {
|
if(admins.contains(sender.getName())) {
|
||||||
stop();
|
stop();
|
||||||
sender.sendMessage("Stop.");
|
sender.sendMessage("Stop.");
|
||||||
log.info("Stopped via Telegram by " + sender.getFirstName());
|
log.info("Stopped via Telegram by " + sender.getFirstName());
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}*/
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,168 @@
|
||||||
|
package de.mrbesen.youtubecrawler;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
|
|
||||||
|
public class Profiler {
|
||||||
|
private final List<String> sectionList = Lists.<String>newArrayList();
|
||||||
|
private final List<Long> timestampList = Lists.<Long>newArrayList();
|
||||||
|
|
||||||
|
/** Flag profiling enabled */
|
||||||
|
public boolean profilingEnabled;
|
||||||
|
|
||||||
|
/** Current profiling section */
|
||||||
|
private String profilingSection = "";
|
||||||
|
private final Map<String, Long> profilingMap = Maps.<String, Long>newHashMap();
|
||||||
|
private long start = -1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear profiling.
|
||||||
|
*/
|
||||||
|
public void clearProfiling() {
|
||||||
|
this.profilingMap.clear();
|
||||||
|
this.profilingSection = "";
|
||||||
|
this.sectionList.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start section
|
||||||
|
*/
|
||||||
|
public void startSection(String name) {
|
||||||
|
if (this.profilingEnabled) {
|
||||||
|
if(start == -1)
|
||||||
|
start = System.nanoTime();
|
||||||
|
if (this.profilingSection.length() > 0) {
|
||||||
|
this.profilingSection = this.profilingSection + ".";
|
||||||
|
}
|
||||||
|
|
||||||
|
this.profilingSection = this.profilingSection + name;
|
||||||
|
this.sectionList.add(this.profilingSection);
|
||||||
|
this.timestampList.add(Long.valueOf(System.nanoTime()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* End section
|
||||||
|
*/
|
||||||
|
public void endSection() {
|
||||||
|
if (this.profilingEnabled) {
|
||||||
|
long start = System.nanoTime();
|
||||||
|
long stop = ((Long) this.timestampList.remove(this.timestampList.size() - 1)).longValue();
|
||||||
|
this.sectionList.remove(this.sectionList.size() - 1);
|
||||||
|
long delta = start - stop;
|
||||||
|
|
||||||
|
if (this.profilingMap.containsKey(this.profilingSection)) {
|
||||||
|
this.profilingMap.put(this.profilingSection,
|
||||||
|
Long.valueOf(((Long) this.profilingMap.get(this.profilingSection)).longValue() + delta));
|
||||||
|
} else {
|
||||||
|
this.profilingMap.put(this.profilingSection, Long.valueOf(delta));
|
||||||
|
}
|
||||||
|
|
||||||
|
this.profilingSection = this.sectionList.isEmpty() ? ""
|
||||||
|
: (String) this.sectionList.get(this.sectionList.size() - 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private int sectioncount(String profilername) {
|
||||||
|
int count = 1;
|
||||||
|
for(char c : profilername.toCharArray()) {
|
||||||
|
if(c == '.')
|
||||||
|
count ++;
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Profiler.Result> getProfilingData(String profilerName) {
|
||||||
|
if (!this.profilingEnabled) {
|
||||||
|
return Collections.<Profiler.Result>emptyList();
|
||||||
|
} else {
|
||||||
|
List<Profiler.Result> out = Lists.<Profiler.Result>newArrayList();
|
||||||
|
long totaltime = System.nanoTime() - start;
|
||||||
|
|
||||||
|
//calculate percentage of each child section
|
||||||
|
int count = sectioncount(profilerName);
|
||||||
|
for (String prfiler_name : this.profilingMap.keySet()) {
|
||||||
|
if (prfiler_name.startsWith(profilerName) && sectioncount(prfiler_name) == count+1) {
|
||||||
|
long subsectiontime = ((Long) this.profilingMap.get(prfiler_name)).longValue();
|
||||||
|
double totaltimep = (double) subsectiontime * 100.0D / (double) totaltime;
|
||||||
|
out.add(new Profiler.Result(prfiler_name, totaltimep, subsectiontime));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (String key : this.profilingMap.keySet()) {
|
||||||
|
this.profilingMap.put(key, Long.valueOf(((Long) this.profilingMap.get(key)).longValue() * 999L / 1000L));
|
||||||
|
}
|
||||||
|
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public LinkedList<String> getTreeView() {
|
||||||
|
/*for(String key : profilingMap.keySet()) {
|
||||||
|
System.out.println(key);
|
||||||
|
}*/
|
||||||
|
//end all sections
|
||||||
|
try {
|
||||||
|
/*LinkedList<String> sections = new LinkedList<>();
|
||||||
|
while(!sectionList.isEmpty()) {
|
||||||
|
String current = sectionList.get(sectionList.size() -1);
|
||||||
|
if(current != null) {
|
||||||
|
sections.add(current.substring(current.lastIndexOf('.')+1));
|
||||||
|
endSection();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(String section : sections) {
|
||||||
|
startSection(section);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
return getTreeView("root", "");
|
||||||
|
} catch(OutOfMemoryError e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private LinkedList<String> getTreeView(String name, String leading) {
|
||||||
|
LinkedList<String> out = new LinkedList<>();
|
||||||
|
if(new Exception().getStackTrace().length > 20)//prevent stack overflow debug only
|
||||||
|
return out;
|
||||||
|
for(Result res : getProfilingData(name)) {
|
||||||
|
out.add(leading + res.profilerName + " " + (res.time/10000000)/100D + "s " + form(res.totalUsePercentage) + "% ");
|
||||||
|
out.addAll(getTreeView(res.profilerName, leading + "\t"));
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String form(double d) {
|
||||||
|
return (((long) (d*1000))/1000)+"";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* End current section and start a new section
|
||||||
|
*/
|
||||||
|
public void endStartSection(String name) {
|
||||||
|
this.endSection();
|
||||||
|
this.startSection(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getNameOfLastSection() {
|
||||||
|
return this.sectionList.size() == 0 ? "[UNKNOWN]" : (String) this.sectionList.get(this.sectionList.size() - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final class Result {
|
||||||
|
public double totalUsePercentage;
|
||||||
|
public String profilerName;
|
||||||
|
public long time;
|
||||||
|
|
||||||
|
public Result(String profilerName, double totalUsePercentage, long time) {
|
||||||
|
this.profilerName = profilerName;
|
||||||
|
this.totalUsePercentage = totalUsePercentage;
|
||||||
|
this.time = time;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -3,7 +3,6 @@ package de.mrbesen.youtubecrawler;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.PrintWriter;
|
import java.io.PrintWriter;
|
||||||
import java.net.InetAddress;
|
import java.net.InetAddress;
|
||||||
import java.net.InetSocketAddress;
|
|
||||||
import java.net.ServerSocket;
|
import java.net.ServerSocket;
|
||||||
import java.net.Socket;
|
import java.net.Socket;
|
||||||
import java.net.SocketTimeoutException;
|
import java.net.SocketTimeoutException;
|
||||||
|
|
|
@ -107,6 +107,7 @@ public class YoutubeAPI {
|
||||||
//Seconds
|
//Seconds
|
||||||
v.length += Integer.parseInt(timeparts[timeparts.length-1]);
|
v.length += Integer.parseInt(timeparts[timeparts.length-1]);
|
||||||
} catch(NumberFormatException e) {//failed: P6DT17H59M53S and P15W3DT4H1M11S and P1W2DT20H47M55S video id: 1NPyC0psMaI and P2W2DT23H58M58S video id: Jd9KjbRxhN4 For input string: "W2DT23"
|
} catch(NumberFormatException e) {//failed: P6DT17H59M53S and P15W3DT4H1M11S and P1W2DT20H47M55S video id: 1NPyC0psMaI and P2W2DT23H58M58S video id: Jd9KjbRxhN4 For input string: "W2DT23"
|
||||||
|
Main.getMain().broadcastAdmin(removeunwanted(split[1]) + " video id: " + v.id);
|
||||||
log.warn("Error saving the time string: " + removeunwanted(split[1]) + " video id: " + v.id, e);
|
log.warn("Error saving the time string: " + removeunwanted(split[1]) + " video id: " + v.id, e);
|
||||||
}
|
}
|
||||||
} else if(split[0].equals("publishedAt")) {
|
} else if(split[0].equals("publishedAt")) {
|
||||||
|
|
Loading…
Reference in New Issue