kleine bugfixes, admins file

This commit is contained in:
MrBesen 2018-09-11 00:05:10 +02:00
parent 4917369b34
commit b67952ce88
3 changed files with 171 additions and 119 deletions

View File

@ -137,119 +137,137 @@ public class Crawler implements Runnable {
} }
while(crawl) { while(crawl) {
log.info("to Crawl: " + toCrawl.size() + " known: " + toknown.size() + " Time: " + dateform.format(new Date())); log.info("to Crawl: " + toCrawl.size() + " known: " + toknown.size() + " Time: " + dateform.format(new Date()));
//fullfill request
while(!requested.isEmpty() && !toCrawl.isEmpty() && crawl) {
log.info("fullfill request");
currentstate = "fullfill requests";
send(requested.remove(0));
}
//kindof idle
while(toCrawl.size() > (jobspeerthread * threads.size()) && crawl && requested.isEmpty()) {
startup = 0;//stop startup count
currentstate = "idle";
Thread.yield();
try {
Thread.sleep(100);
} catch(InterruptedException ignored) {
break;
}
// updateDB();
}
//nothing left?
if(toknown.isEmpty() && toCrawl.isEmpty() && requested.size() == threads.size()) {//very uncommon
log.warn("nothing left to crawl");
crawl = false;
}
//refil the tocrawl list.
if(!toknown.isEmpty()) {
//check in db for known videos
log.info("Checking the DB");
currentstate = "get new tocrawl";
// listlock.writeLock().lock();
while(toCrawl.size() < jobspeerthread * threads.size() * 2 && crawl && !toknown.isEmpty()) {
LinkedList<String> tocheck = new LinkedList<>();
for(int i = 0; i < toknown.size() && i < maxvideostotest; i++) {
tocheck.add(toknown.removeFirst());
}
toCrawl.addAll(db.checkvideos(tocheck));
}
// listlock.writeLock().unlock();
}
if(toknown.size() < threadcount * jobspeerthread * 20 && crawl) {
currentstate = "restoretemp";
log.info("restoreTemp");
LinkedList<String> rest = db.restoreTemp();
toknown.addAll(rest);
}
//writing crawlfile
log.info("Writing Crawlfile");
currentstate = "writing crawlfile";
// listlock.writeLock().lock();
try { try {
PrintWriter p = new PrintWriter(new BufferedWriter(new FileWriter(crawlfile))); //fullfill request
for(String t : toCrawl) { while(!requested.isEmpty() && !toCrawl.isEmpty() && crawl) {
p.println(t); log.info("fullfill request");
currentstate = "fullfill requests";
send(requested.remove(0));
} }
p.println("-");
for(String t : toknown) {
p.println(t);
}
p.close();
} catch (IOException e) {
log.error("Error writing crawlfile.", e);
}
//get reports //kindof idle
currentstate = "get report"; while(toCrawl.size() > (jobspeerthread * threads.size()) && crawl && requested.isEmpty()) {
log.info("get report"); startup = 0;//stop startup count
int count = 0; currentstate = "idle";
for (CrawlerThread crawlerThread : threads) { Thread.yield();
LinkedList<String>[] report = crawlerThread.report();
crawlcount+= report[0].size();
toSave.addAll(report[0]);
crawlerThread.crawled.clear();
while(report[1].size() > 0) {
LinkedList<String> store = new LinkedList<>();
try { try {
while(!report[1].isEmpty() && store.size() < 50) { Thread.sleep(100);
store.add(report[1].removeFirst()); } catch(InterruptedException ignored) {
break;
count++;
}
} catch(NoSuchElementException ignored) {//concurrentmodification fuckery
} }
db.storeTemp(store); // updateDB();
}
//nothing left?
if(toknown.isEmpty() && toCrawl.isEmpty() && requested.size() == threads.size()) {//very uncommon
log.warn("nothing left to crawl");
crawl = false;
} }
log.info(count + " videos added.");
crawlerThread.found.clear();
}
//save to db //refil the tocrawl list.
currentstate = "save to DB"; if(!toknown.isEmpty()) {
log.info("save " + toSave.size() + " videos to DB."); //check in db for known videos
while(!toSave.isEmpty()) { log.info("Checking the DB");
LinkedList<String> videoids = new LinkedList<>(); currentstate = "get new tocrawl";
for(int i = 0; i < 50 && !toSave.isEmpty(); i++) { // listlock.writeLock().lock();
videoids.add(toSave.remove(0)); while(toCrawl.size() < jobspeerthread * threads.size() * 2 && crawl && !toknown.isEmpty()) {
LinkedList<String> tocheck = new LinkedList<>();
for(int i = 0; i < toknown.size() && i < maxvideostotest; i++) {
tocheck.add(toknown.removeFirst());
}
toCrawl.addAll(db.checkvideos(tocheck));
}
// listlock.writeLock().unlock();
}
if(toknown.size() < threadcount * jobspeerthread * 20 && crawl) {
currentstate = "restoretemp";
log.info("restoreTemp");
LinkedList<String> rest = db.restoreTemp();
toknown.addAll(rest);
} }
if(videoids.size() > 0) {
List<Video> videos = api.getInfos(videoids)[0];
db.addVideos(videos);
}
}
//at the beginning there is maybe just one video to crawl, so keep it calm. //writing crawlfile
if(startup > 0) { log.info("Writing Crawlfile");
startup --; currentstate = "writing crawlfile";
currentstate = "startup sleep"; // listlock.writeLock().lock();
log.info("startup sleep");
try { try {
Thread.sleep(2000); PrintWriter p = new PrintWriter(new BufferedWriter(new FileWriter(crawlfile)));
} catch(InterruptedException e) {} for(String t : toCrawl) {
p.println(t);
}
p.println("-");
for(String t : toknown) {
p.println(t);
}
p.close();
} catch (IOException e) {
log.error("Error writing crawlfile.", e);
}
//get reports
currentstate = "get report";
log.info("get report");
int count = 0;
for (CrawlerThread crawlerThread : threads) {
LinkedList<String>[] report = crawlerThread.report();
crawlcount+= report[0].size();
toSave.addAll(report[0]);
crawlerThread.crawled.clear();
while(report[1].size() > 15) {
LinkedList<String> store = new LinkedList<>();
try {
while(!report[1].isEmpty() && store.size() < 50) {
store.add(report[1].removeFirst());
count++;
}
} catch(NoSuchElementException ignored) {//concurrentmodification fuckery
log.info("no suchelement bla");
}
db.storeTemp(store);
}
log.info(count + " videos added.");
crawlerThread.found.clear();
}
db.deleteDouble();
long runtimes = (System.currentTimeMillis() - start) / 1000;
if(runtimes < 0)
runtimes = 1;
float vidps = (crawlcount / (float) runtimes);//videos per second
Main.getMain().broadcastAdmin(vidps + "v/s " + crawlcount + " total V");
//save to db
currentstate = "save to DB";
log.info("save " + toSave.size() + " videos to DB.");
while(!toSave.isEmpty()) {
LinkedList<String> videoids = new LinkedList<>();
for(int i = 0; i < 50 && !toSave.isEmpty(); i++) {
videoids.add(toSave.remove(0));
}
if(videoids.size() > 0) {
List<Video> videos = api.getInfos(videoids)[0];
db.addVideos(videos);
}
}
//at the beginning there is maybe just one video to crawl, so keep it calm.
if(startup > 0) {
startup --;
currentstate = "startup sleep";
log.info("startup sleep");
try {
Thread.sleep(2000);
} catch(InterruptedException e) {}
}
} catch(Throwable t) {
log.warn("exception in Crawler!", t);
StringBuilder sb = new StringBuilder();
for(StackTraceElement elem : t.getStackTrace()) {
sb.append(elem.getFileName() + "(").append(elem.getMethodName() + ":").append(elem.getLineNumber() + ")\n");
}
Main.getMain().broadcastAdmin("Excpetion in crawler: " + t.toString() + "\n" + sb.toString() );
crawl = false;
Main.getMain().stop();
} }
} }
@ -270,12 +288,12 @@ public class Crawler implements Runnable {
return new Video(); return new Video();
} }
public String printStats() { public String getStats() {
long runtimes = (System.currentTimeMillis() - start) / 1000; long runtimes = (System.currentTimeMillis() - start) / 1000;
if(runtimes < 0) if(runtimes < 0)
runtimes = 1; runtimes = 1;
int runtimem = (int) (runtimes / 60);
float vidps = (crawlcount / (float) runtimes);//videos per second float vidps = (crawlcount / (float) runtimes);//videos per second
int runtimem = (int) (runtimes / 60);
String out = ""; String out = "";
out += "ToCrawl: " + toCrawl.size(); out += "ToCrawl: " + toCrawl.size();
out += "\nToknown: " + toknown.size(); out += "\nToknown: " + toknown.size();

View File

@ -43,7 +43,7 @@ public class DB {
con.setCatalog(db); con.setCatalog(db);
update("CREATE TABLE `videos` (`id` varchar(13) NOT NULL,`length` int(11) NOT NULL,`created` int(11) NOT NULL,`langcode` varchar(3) NOT NULL DEFAULT 'en',`category` int(11) DEFAULT NULL, PRIMARY KEY (`id`), UNIQUE KEY `id_UNIQUE` (`id`)) ENGINE=InnoDB DEFAULT CHARSET=latin1;"); update("CREATE TABLE `videos` (`id` varchar(13) NOT NULL,`length` int(11) NOT NULL,`created` int(11) NOT NULL,`langcode` varchar(3) NOT NULL DEFAULT 'en',`category` int(11) DEFAULT NULL, PRIMARY KEY (`id`), UNIQUE KEY `id_UNIQUE` (`id`)) ENGINE=InnoDB DEFAULT CHARSET=latin1;");
update("CREATE TABLE `temp` ( `ytid` varchar(13) NOT NULL COMMENT 'a Table to store Video ids, when they are found to process them later', PRIMARY KEY (`ytid`), UNIQUE KEY `ytid_UNIQUE` (`ytid`)) ENGINE=InnoDB DEFAULT CHARSET=utf8;"); update("CREATE TABLE `temp` ( `ytid` varchar(13) NOT NULL COMMENT 'a Table to store Video ids, when they are found to process them later', PRIMARY KEY (`ytid`), UNIQUE KEY `ytid_UNIQUE` (`ytid`)) ENGINE=InnoDB DEFAULT CHARSET=utf8;");
log.info("Database is set up!"); log.info("Database is set up!");
} }
} catch (SQLException e) { } catch (SQLException e) {
@ -97,7 +97,8 @@ public class DB {
if(i > 0) if(i > 0)
sb.append(','); sb.append(',');
Video v = input.get(i); Video v = input.get(i);
sb.append("('").append(v.id).append("','").append(v.length).append("','").append(v.created).append("','").append(v.languageCode).append("','").append(v.categorie).append("','").append(v.title).append("','").append(v.channel).append("','").append(v.tags).append("') "); if(v != null)
sb.append("('").append(v.id).append("','").append(v.length).append("','").append(v.created).append("','").append(v.languageCode).append("','").append(v.categorie).append("','").append(v.title).append("','").append(v.channel).append("','").append(v.tags).append("') ");
} }
if(sb.length() > 2) { if(sb.length() > 2) {
String qu = "INSERT IGNORE INTO `ytcrawler`.`videos`(`id`, `length`, `created`, `langcode`, `category`, `videotitle`, `channel`, `tags`) VALUES " + sb.toString(); String qu = "INSERT IGNORE INTO `ytcrawler`.`videos`(`id`, `length`, `created`, `langcode`, `category`, `videotitle`, `channel`, `tags`) VALUES " + sb.toString();
@ -195,18 +196,22 @@ public class DB {
} }
public LinkedList<String> restoreTemp() { public LinkedList<String> restoreTemp() {
ResultSet res = query("SELECT * FROM `ytcrawler`.`temp` LIMIT 0,500;"); ResultSet res = query("SELECT * FROM `ytcrawler`.`temp` LIMIT 500;");
LinkedList<String> out = new LinkedList<>(); LinkedList<String> out = new LinkedList<>();
log.info("RestoreTemp"); log.info("RestoreTemp");
try { try {
while(res.next()) { while(res.next()) {
out.add(res.getString(1)); out.add(res.getString(1));
} }
update("DELETE FROM `ytcrawler`.`temp` LIMIT 0,500;"); update("DELETE FROM `ytcrawler`.`temp` LIMIT 500;");
} catch (Exception e) {} } catch (Exception e) {}
return out; return out;
} }
public void deleteDouble() {
update("call ytcrawler.deletedouble();");
}
public void storeTemp(LinkedList<String> strings) { public void storeTemp(LinkedList<String> strings) {
if(!strings.isEmpty()) { if(!strings.isEmpty()) {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();

View File

@ -1,6 +1,7 @@
package de.mrbesen.youtubecrawler; package de.mrbesen.youtubecrawler;
import java.io.File; import java.io.File;
import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Random; import java.util.Random;
import java.util.Scanner; import java.util.Scanner;
@ -16,16 +17,22 @@ import de.mrbesen.telegram.objects.TUser;
public class Main implements CommandHandler, EventListener{ public class Main implements CommandHandler, EventListener{
private ArrayList<TUser> admins = new ArrayList<>(); private ArrayList<String> admins = new ArrayList<>();//usernames of admins
private String adminstr = null; private String adminstr = null;
private long setadminstr = -1; private long setadminstr = -1;
private static String abc = "abcdefghijklmnopqrstuvwxyz"; private static String abc = "abcdefghijklmnopqrstuvwxyz";
private Logger log = Logger.getLogger(this.getClass().getName()); private Logger log = Logger.getLogger(this.getClass().getName());
private TelegramAPI tapi; private TelegramAPI tapi;
private Thread mainthread; private Thread mainthread;
private static Main main;
public static Main getMain() {
return main;
}
public static void main(String[] args) { public static void main(String[] args) {
new Main().run(); main = new Main();
main.run();
} }
private Crawler cra; private Crawler cra;
@ -53,8 +60,23 @@ public class Main implements CommandHandler, EventListener{
tapi.getCommandManager().registerCommand("stop", this); tapi.getCommandManager().registerCommand("stop", this);
tapi.getEventManager().registerEvent(this); tapi.getEventManager().registerEvent(this);
tapi.setHelpText("Send the command /random to get a random video."); tapi.setHelpText("Send the command /random to get a random video.");
tapi.setUpdateInterval(25000);
tapi.start(); tapi.start();
//load admins
Log.l.info("Loading admins.");
try {
Scanner scan = new Scanner(new File("admins"));
while(scan.hasNext()) {
String line = scan.nextLine().trim();
if(!line.isEmpty()) {
admins.add(line);
}
}
scan.close();
} catch (IOException | NumberFormatException | ArrayIndexOutOfBoundsException e) {
e.printStackTrace();
}
//CLI //CLI
Scanner s = new Scanner(System.in); Scanner s = new Scanner(System.in);
String in; String in;
@ -71,7 +93,7 @@ public class Main implements CommandHandler, EventListener{
} }
} else if(in.equalsIgnoreCase("stats")) { } else if(in.equalsIgnoreCase("stats")) {
log.info("Getting Stats"); log.info("Getting Stats");
for(String line : cra.printStats().split("\n")) { for(String line : cra.getStats().split("\n")) {
log.info(line); log.info(line);
} }
} }
@ -80,7 +102,8 @@ public class Main implements CommandHandler, EventListener{
log.info("Terminated."); log.info("Terminated.");
} }
private void stop() { public void stop() {
log.info("Stop.");
tapi.stop(); tapi.stop();
cra.stop(); cra.stop();
mainthread.interrupt(); mainthread.interrupt();
@ -93,7 +116,7 @@ public class Main implements CommandHandler, EventListener{
sender.sendMessage("https://youtube.com/watch?v=" + ytid); sender.sendMessage("https://youtube.com/watch?v=" + ytid);
return true; return true;
} else if(cmd.equals("admin")) { } else if(cmd.equals("admin")) {
if(admins.contains(sender)) { if(admins.contains(sender.getName())) {
sender.sendMessage("You are admin."); sender.sendMessage("You are admin.");
return true; return true;
} else { } else {
@ -102,12 +125,12 @@ public class Main implements CommandHandler, EventListener{
log.info("Adminstr: " + adminstr); log.info("Adminstr: " + adminstr);
} }
} else if(cmd.equals("stats")) { } else if(cmd.equals("stats")) {
if(admins.contains(sender)) { if(admins.contains(sender.getName())) {
sender.sendMessage(cra.printStats()); sender.sendMessage(cra.getStats());
return true; return true;
} }
} else if(cmd.equals("stop")) { } else if(cmd.equals("stop")) {
if(admins.contains(sender)) { if(admins.contains(sender.getName())) {
stop(); stop();
sender.sendMessage("Stop."); sender.sendMessage("Stop.");
log.info("Stopped via Telegram by " + sender.getFirstName()); log.info("Stopped via Telegram by " + sender.getFirstName());
@ -117,6 +140,12 @@ public class Main implements CommandHandler, EventListener{
return false; return false;
} }
public void broadcastAdmin(String msg) {
for(String admin : admins) {
tapi.getUser(admin).sendMessage(msg);
}
}
private String getRandomStr(int length) { private String getRandomStr(int length) {
Random rand = new Random(); Random rand = new Random();
String out = ""; String out = "";
@ -132,11 +161,11 @@ public class Main implements CommandHandler, EventListener{
if(e.getMessage() != null && (System.currentTimeMillis() - setadminstr) / 1000 < 60) { if(e.getMessage() != null && (System.currentTimeMillis() - setadminstr) / 1000 < 60) {
if(e.getMessage().getText() != null) { if(e.getMessage().getText() != null) {
if(e.getMessage().getText().equals(adminstr)) { if(e.getMessage().getText().equals(adminstr)) {
admins.add(e.getUser()); admins.add(e.getUser().getName());
e.getMessage().reply("You are now Admin!"); e.getMessage().reply("You are now Admin!");
adminstr = null; adminstr = null;
setadminstr = -1; setadminstr = -1;
log.info(e.getUser().getFirstName() + " is now Admin!"); log.info(e.getUser().getID() + " " + e.getUser().getName() + " " + e.getUser().getFirstName() + " is now Admin!");
} }
} }
} }