kleine bugfixes, admins file

This commit is contained in:
MrBesen 2018-09-11 00:05:10 +02:00
parent 4917369b34
commit b67952ce88
3 changed files with 171 additions and 119 deletions

View File

@ -137,119 +137,137 @@ public class Crawler implements Runnable {
}
while(crawl) {
log.info("to Crawl: " + toCrawl.size() + " known: " + toknown.size() + " Time: " + dateform.format(new Date()));
//fullfill request
while(!requested.isEmpty() && !toCrawl.isEmpty() && crawl) {
log.info("fullfill request");
currentstate = "fullfill requests";
send(requested.remove(0));
}
//kindof idle
while(toCrawl.size() > (jobspeerthread * threads.size()) && crawl && requested.isEmpty()) {
startup = 0;//stop startup count
currentstate = "idle";
Thread.yield();
try {
Thread.sleep(100);
} catch(InterruptedException ignored) {
break;
}
// updateDB();
}
//nothing left?
if(toknown.isEmpty() && toCrawl.isEmpty() && requested.size() == threads.size()) {//very uncommon
log.warn("nothing left to crawl");
crawl = false;
}
//refil the tocrawl list.
if(!toknown.isEmpty()) {
//check in db for known videos
log.info("Checking the DB");
currentstate = "get new tocrawl";
// listlock.writeLock().lock();
while(toCrawl.size() < jobspeerthread * threads.size() * 2 && crawl && !toknown.isEmpty()) {
LinkedList<String> tocheck = new LinkedList<>();
for(int i = 0; i < toknown.size() && i < maxvideostotest; i++) {
tocheck.add(toknown.removeFirst());
}
toCrawl.addAll(db.checkvideos(tocheck));
}
// listlock.writeLock().unlock();
}
if(toknown.size() < threadcount * jobspeerthread * 20 && crawl) {
currentstate = "restoretemp";
log.info("restoreTemp");
LinkedList<String> rest = db.restoreTemp();
toknown.addAll(rest);
}
//writing crawlfile
log.info("Writing Crawlfile");
currentstate = "writing crawlfile";
// listlock.writeLock().lock();
try {
PrintWriter p = new PrintWriter(new BufferedWriter(new FileWriter(crawlfile)));
for(String t : toCrawl) {
p.println(t);
//fullfill request
while(!requested.isEmpty() && !toCrawl.isEmpty() && crawl) {
log.info("fullfill request");
currentstate = "fullfill requests";
send(requested.remove(0));
}
p.println("-");
for(String t : toknown) {
p.println(t);
}
p.close();
} catch (IOException e) {
log.error("Error writing crawlfile.", e);
}
//get reports
currentstate = "get report";
log.info("get report");
int count = 0;
for (CrawlerThread crawlerThread : threads) {
LinkedList<String>[] report = crawlerThread.report();
crawlcount+= report[0].size();
toSave.addAll(report[0]);
crawlerThread.crawled.clear();
while(report[1].size() > 0) {
LinkedList<String> store = new LinkedList<>();
//kindof idle
while(toCrawl.size() > (jobspeerthread * threads.size()) && crawl && requested.isEmpty()) {
startup = 0;//stop startup count
currentstate = "idle";
Thread.yield();
try {
while(!report[1].isEmpty() && store.size() < 50) {
store.add(report[1].removeFirst());
count++;
}
} catch(NoSuchElementException ignored) {//concurrentmodification fuckery
Thread.sleep(100);
} catch(InterruptedException ignored) {
break;
}
db.storeTemp(store);
// updateDB();
}
//nothing left?
if(toknown.isEmpty() && toCrawl.isEmpty() && requested.size() == threads.size()) {//very uncommon
log.warn("nothing left to crawl");
crawl = false;
}
log.info(count + " videos added.");
crawlerThread.found.clear();
}
//save to db
currentstate = "save to DB";
log.info("save " + toSave.size() + " videos to DB.");
while(!toSave.isEmpty()) {
LinkedList<String> videoids = new LinkedList<>();
for(int i = 0; i < 50 && !toSave.isEmpty(); i++) {
videoids.add(toSave.remove(0));
//refil the tocrawl list.
if(!toknown.isEmpty()) {
//check in db for known videos
log.info("Checking the DB");
currentstate = "get new tocrawl";
// listlock.writeLock().lock();
while(toCrawl.size() < jobspeerthread * threads.size() * 2 && crawl && !toknown.isEmpty()) {
LinkedList<String> tocheck = new LinkedList<>();
for(int i = 0; i < toknown.size() && i < maxvideostotest; i++) {
tocheck.add(toknown.removeFirst());
}
toCrawl.addAll(db.checkvideos(tocheck));
}
// listlock.writeLock().unlock();
}
if(toknown.size() < threadcount * jobspeerthread * 20 && crawl) {
currentstate = "restoretemp";
log.info("restoreTemp");
LinkedList<String> rest = db.restoreTemp();
toknown.addAll(rest);
}
if(videoids.size() > 0) {
List<Video> videos = api.getInfos(videoids)[0];
db.addVideos(videos);
}
}
//at the beginning there is maybe just one video to crawl, so keep it calm.
if(startup > 0) {
startup --;
currentstate = "startup sleep";
log.info("startup sleep");
//writing crawlfile
log.info("Writing Crawlfile");
currentstate = "writing crawlfile";
// listlock.writeLock().lock();
try {
Thread.sleep(2000);
} catch(InterruptedException e) {}
PrintWriter p = new PrintWriter(new BufferedWriter(new FileWriter(crawlfile)));
for(String t : toCrawl) {
p.println(t);
}
p.println("-");
for(String t : toknown) {
p.println(t);
}
p.close();
} catch (IOException e) {
log.error("Error writing crawlfile.", e);
}
//get reports
currentstate = "get report";
log.info("get report");
int count = 0;
for (CrawlerThread crawlerThread : threads) {
LinkedList<String>[] report = crawlerThread.report();
crawlcount+= report[0].size();
toSave.addAll(report[0]);
crawlerThread.crawled.clear();
while(report[1].size() > 15) {
LinkedList<String> store = new LinkedList<>();
try {
while(!report[1].isEmpty() && store.size() < 50) {
store.add(report[1].removeFirst());
count++;
}
} catch(NoSuchElementException ignored) {//concurrentmodification fuckery
log.info("no suchelement bla");
}
db.storeTemp(store);
}
log.info(count + " videos added.");
crawlerThread.found.clear();
}
db.deleteDouble();
long runtimes = (System.currentTimeMillis() - start) / 1000;
if(runtimes < 0)
runtimes = 1;
float vidps = (crawlcount / (float) runtimes);//videos per second
Main.getMain().broadcastAdmin(vidps + "v/s " + crawlcount + " total V");
//save to db
currentstate = "save to DB";
log.info("save " + toSave.size() + " videos to DB.");
while(!toSave.isEmpty()) {
LinkedList<String> videoids = new LinkedList<>();
for(int i = 0; i < 50 && !toSave.isEmpty(); i++) {
videoids.add(toSave.remove(0));
}
if(videoids.size() > 0) {
List<Video> videos = api.getInfos(videoids)[0];
db.addVideos(videos);
}
}
//at the beginning there is maybe just one video to crawl, so keep it calm.
if(startup > 0) {
startup --;
currentstate = "startup sleep";
log.info("startup sleep");
try {
Thread.sleep(2000);
} catch(InterruptedException e) {}
}
} catch(Throwable t) {
log.warn("exception in Crawler!", t);
StringBuilder sb = new StringBuilder();
for(StackTraceElement elem : t.getStackTrace()) {
sb.append(elem.getFileName() + "(").append(elem.getMethodName() + ":").append(elem.getLineNumber() + ")\n");
}
Main.getMain().broadcastAdmin("Excpetion in crawler: " + t.toString() + "\n" + sb.toString() );
crawl = false;
Main.getMain().stop();
}
}
@ -270,12 +288,12 @@ public class Crawler implements Runnable {
return new Video();
}
public String printStats() {
public String getStats() {
long runtimes = (System.currentTimeMillis() - start) / 1000;
if(runtimes < 0)
runtimes = 1;
int runtimem = (int) (runtimes / 60);
float vidps = (crawlcount / (float) runtimes);//videos per second
int runtimem = (int) (runtimes / 60);
String out = "";
out += "ToCrawl: " + toCrawl.size();
out += "\nToknown: " + toknown.size();

View File

@ -43,7 +43,7 @@ public class DB {
con.setCatalog(db);
update("CREATE TABLE `videos` (`id` varchar(13) NOT NULL,`length` int(11) NOT NULL,`created` int(11) NOT NULL,`langcode` varchar(3) NOT NULL DEFAULT 'en',`category` int(11) DEFAULT NULL, PRIMARY KEY (`id`), UNIQUE KEY `id_UNIQUE` (`id`)) ENGINE=InnoDB DEFAULT CHARSET=latin1;");
update("CREATE TABLE `temp` ( `ytid` varchar(13) NOT NULL COMMENT 'a Table to store Video ids, when they are found to process them later', PRIMARY KEY (`ytid`), UNIQUE KEY `ytid_UNIQUE` (`ytid`)) ENGINE=InnoDB DEFAULT CHARSET=utf8;");
log.info("Database is set up!");
}
} catch (SQLException e) {
@ -97,7 +97,8 @@ public class DB {
if(i > 0)
sb.append(',');
Video v = input.get(i);
sb.append("('").append(v.id).append("','").append(v.length).append("','").append(v.created).append("','").append(v.languageCode).append("','").append(v.categorie).append("','").append(v.title).append("','").append(v.channel).append("','").append(v.tags).append("') ");
if(v != null)
sb.append("('").append(v.id).append("','").append(v.length).append("','").append(v.created).append("','").append(v.languageCode).append("','").append(v.categorie).append("','").append(v.title).append("','").append(v.channel).append("','").append(v.tags).append("') ");
}
if(sb.length() > 2) {
String qu = "INSERT IGNORE INTO `ytcrawler`.`videos`(`id`, `length`, `created`, `langcode`, `category`, `videotitle`, `channel`, `tags`) VALUES " + sb.toString();
@ -195,18 +196,22 @@ public class DB {
}
public LinkedList<String> restoreTemp() {
ResultSet res = query("SELECT * FROM `ytcrawler`.`temp` LIMIT 0,500;");
ResultSet res = query("SELECT * FROM `ytcrawler`.`temp` LIMIT 500;");
LinkedList<String> out = new LinkedList<>();
log.info("RestoreTemp");
try {
while(res.next()) {
out.add(res.getString(1));
}
update("DELETE FROM `ytcrawler`.`temp` LIMIT 0,500;");
update("DELETE FROM `ytcrawler`.`temp` LIMIT 500;");
} catch (Exception e) {}
return out;
}
public void deleteDouble() {
update("call ytcrawler.deletedouble();");
}
public void storeTemp(LinkedList<String> strings) {
if(!strings.isEmpty()) {
StringBuilder sb = new StringBuilder();

View File

@ -1,6 +1,7 @@
package de.mrbesen.youtubecrawler;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Random;
import java.util.Scanner;
@ -16,16 +17,22 @@ import de.mrbesen.telegram.objects.TUser;
public class Main implements CommandHandler, EventListener{
private ArrayList<TUser> admins = new ArrayList<>();
private ArrayList<String> admins = new ArrayList<>();//usernames of admins
private String adminstr = null;
private long setadminstr = -1;
private static String abc = "abcdefghijklmnopqrstuvwxyz";
private Logger log = Logger.getLogger(this.getClass().getName());
private TelegramAPI tapi;
private Thread mainthread;
private static Main main;
public static Main getMain() {
return main;
}
public static void main(String[] args) {
new Main().run();
main = new Main();
main.run();
}
private Crawler cra;
@ -53,8 +60,23 @@ public class Main implements CommandHandler, EventListener{
tapi.getCommandManager().registerCommand("stop", this);
tapi.getEventManager().registerEvent(this);
tapi.setHelpText("Send the command /random to get a random video.");
tapi.setUpdateInterval(25000);
tapi.start();
//load admins
Log.l.info("Loading admins.");
try {
Scanner scan = new Scanner(new File("admins"));
while(scan.hasNext()) {
String line = scan.nextLine().trim();
if(!line.isEmpty()) {
admins.add(line);
}
}
scan.close();
} catch (IOException | NumberFormatException | ArrayIndexOutOfBoundsException e) {
e.printStackTrace();
}
//CLI
Scanner s = new Scanner(System.in);
String in;
@ -71,7 +93,7 @@ public class Main implements CommandHandler, EventListener{
}
} else if(in.equalsIgnoreCase("stats")) {
log.info("Getting Stats");
for(String line : cra.printStats().split("\n")) {
for(String line : cra.getStats().split("\n")) {
log.info(line);
}
}
@ -80,7 +102,8 @@ public class Main implements CommandHandler, EventListener{
log.info("Terminated.");
}
private void stop() {
public void stop() {
log.info("Stop.");
tapi.stop();
cra.stop();
mainthread.interrupt();
@ -93,7 +116,7 @@ public class Main implements CommandHandler, EventListener{
sender.sendMessage("https://youtube.com/watch?v=" + ytid);
return true;
} else if(cmd.equals("admin")) {
if(admins.contains(sender)) {
if(admins.contains(sender.getName())) {
sender.sendMessage("You are admin.");
return true;
} else {
@ -102,12 +125,12 @@ public class Main implements CommandHandler, EventListener{
log.info("Adminstr: " + adminstr);
}
} else if(cmd.equals("stats")) {
if(admins.contains(sender)) {
sender.sendMessage(cra.printStats());
if(admins.contains(sender.getName())) {
sender.sendMessage(cra.getStats());
return true;
}
} else if(cmd.equals("stop")) {
if(admins.contains(sender)) {
if(admins.contains(sender.getName())) {
stop();
sender.sendMessage("Stop.");
log.info("Stopped via Telegram by " + sender.getFirstName());
@ -117,6 +140,12 @@ public class Main implements CommandHandler, EventListener{
return false;
}
public void broadcastAdmin(String msg) {
for(String admin : admins) {
tapi.getUser(admin).sendMessage(msg);
}
}
private String getRandomStr(int length) {
Random rand = new Random();
String out = "";
@ -132,11 +161,11 @@ public class Main implements CommandHandler, EventListener{
if(e.getMessage() != null && (System.currentTimeMillis() - setadminstr) / 1000 < 60) {
if(e.getMessage().getText() != null) {
if(e.getMessage().getText().equals(adminstr)) {
admins.add(e.getUser());
admins.add(e.getUser().getName());
e.getMessage().reply("You are now Admin!");
adminstr = null;
setadminstr = -1;
log.info(e.getUser().getFirstName() + " is now Admin!");
log.info(e.getUser().getID() + " " + e.getUser().getName() + " " + e.getUser().getFirstName() + " is now Admin!");
}
}
}