forked from MrBesen/YoutubeCrawler
kleine bugfixes, admins file
This commit is contained in:
parent
4917369b34
commit
b67952ce88
|
@ -137,119 +137,137 @@ public class Crawler implements Runnable {
|
|||
}
|
||||
while(crawl) {
|
||||
log.info("to Crawl: " + toCrawl.size() + " known: " + toknown.size() + " Time: " + dateform.format(new Date()));
|
||||
|
||||
//fullfill request
|
||||
while(!requested.isEmpty() && !toCrawl.isEmpty() && crawl) {
|
||||
log.info("fullfill request");
|
||||
currentstate = "fullfill requests";
|
||||
send(requested.remove(0));
|
||||
}
|
||||
|
||||
//kindof idle
|
||||
while(toCrawl.size() > (jobspeerthread * threads.size()) && crawl && requested.isEmpty()) {
|
||||
startup = 0;//stop startup count
|
||||
currentstate = "idle";
|
||||
Thread.yield();
|
||||
try {
|
||||
Thread.sleep(100);
|
||||
} catch(InterruptedException ignored) {
|
||||
break;
|
||||
}
|
||||
// updateDB();
|
||||
}
|
||||
//nothing left?
|
||||
if(toknown.isEmpty() && toCrawl.isEmpty() && requested.size() == threads.size()) {//very uncommon
|
||||
log.warn("nothing left to crawl");
|
||||
crawl = false;
|
||||
}
|
||||
|
||||
//refil the tocrawl list.
|
||||
if(!toknown.isEmpty()) {
|
||||
//check in db for known videos
|
||||
log.info("Checking the DB");
|
||||
currentstate = "get new tocrawl";
|
||||
// listlock.writeLock().lock();
|
||||
while(toCrawl.size() < jobspeerthread * threads.size() * 2 && crawl && !toknown.isEmpty()) {
|
||||
LinkedList<String> tocheck = new LinkedList<>();
|
||||
for(int i = 0; i < toknown.size() && i < maxvideostotest; i++) {
|
||||
tocheck.add(toknown.removeFirst());
|
||||
}
|
||||
toCrawl.addAll(db.checkvideos(tocheck));
|
||||
}
|
||||
// listlock.writeLock().unlock();
|
||||
}
|
||||
if(toknown.size() < threadcount * jobspeerthread * 20 && crawl) {
|
||||
currentstate = "restoretemp";
|
||||
log.info("restoreTemp");
|
||||
LinkedList<String> rest = db.restoreTemp();
|
||||
toknown.addAll(rest);
|
||||
}
|
||||
|
||||
//writing crawlfile
|
||||
log.info("Writing Crawlfile");
|
||||
currentstate = "writing crawlfile";
|
||||
// listlock.writeLock().lock();
|
||||
try {
|
||||
PrintWriter p = new PrintWriter(new BufferedWriter(new FileWriter(crawlfile)));
|
||||
for(String t : toCrawl) {
|
||||
p.println(t);
|
||||
//fullfill request
|
||||
while(!requested.isEmpty() && !toCrawl.isEmpty() && crawl) {
|
||||
log.info("fullfill request");
|
||||
currentstate = "fullfill requests";
|
||||
send(requested.remove(0));
|
||||
}
|
||||
p.println("-");
|
||||
for(String t : toknown) {
|
||||
p.println(t);
|
||||
}
|
||||
p.close();
|
||||
} catch (IOException e) {
|
||||
log.error("Error writing crawlfile.", e);
|
||||
}
|
||||
|
||||
//get reports
|
||||
currentstate = "get report";
|
||||
log.info("get report");
|
||||
int count = 0;
|
||||
for (CrawlerThread crawlerThread : threads) {
|
||||
LinkedList<String>[] report = crawlerThread.report();
|
||||
crawlcount+= report[0].size();
|
||||
toSave.addAll(report[0]);
|
||||
crawlerThread.crawled.clear();
|
||||
while(report[1].size() > 0) {
|
||||
LinkedList<String> store = new LinkedList<>();
|
||||
//kindof idle
|
||||
while(toCrawl.size() > (jobspeerthread * threads.size()) && crawl && requested.isEmpty()) {
|
||||
startup = 0;//stop startup count
|
||||
currentstate = "idle";
|
||||
Thread.yield();
|
||||
try {
|
||||
while(!report[1].isEmpty() && store.size() < 50) {
|
||||
store.add(report[1].removeFirst());
|
||||
|
||||
count++;
|
||||
}
|
||||
} catch(NoSuchElementException ignored) {//concurrentmodification fuckery
|
||||
Thread.sleep(100);
|
||||
} catch(InterruptedException ignored) {
|
||||
break;
|
||||
}
|
||||
db.storeTemp(store);
|
||||
// updateDB();
|
||||
}
|
||||
//nothing left?
|
||||
if(toknown.isEmpty() && toCrawl.isEmpty() && requested.size() == threads.size()) {//very uncommon
|
||||
log.warn("nothing left to crawl");
|
||||
crawl = false;
|
||||
}
|
||||
log.info(count + " videos added.");
|
||||
crawlerThread.found.clear();
|
||||
}
|
||||
|
||||
//save to db
|
||||
currentstate = "save to DB";
|
||||
log.info("save " + toSave.size() + " videos to DB.");
|
||||
while(!toSave.isEmpty()) {
|
||||
LinkedList<String> videoids = new LinkedList<>();
|
||||
for(int i = 0; i < 50 && !toSave.isEmpty(); i++) {
|
||||
videoids.add(toSave.remove(0));
|
||||
//refil the tocrawl list.
|
||||
if(!toknown.isEmpty()) {
|
||||
//check in db for known videos
|
||||
log.info("Checking the DB");
|
||||
currentstate = "get new tocrawl";
|
||||
// listlock.writeLock().lock();
|
||||
while(toCrawl.size() < jobspeerthread * threads.size() * 2 && crawl && !toknown.isEmpty()) {
|
||||
LinkedList<String> tocheck = new LinkedList<>();
|
||||
for(int i = 0; i < toknown.size() && i < maxvideostotest; i++) {
|
||||
tocheck.add(toknown.removeFirst());
|
||||
}
|
||||
toCrawl.addAll(db.checkvideos(tocheck));
|
||||
}
|
||||
// listlock.writeLock().unlock();
|
||||
}
|
||||
if(toknown.size() < threadcount * jobspeerthread * 20 && crawl) {
|
||||
currentstate = "restoretemp";
|
||||
log.info("restoreTemp");
|
||||
LinkedList<String> rest = db.restoreTemp();
|
||||
toknown.addAll(rest);
|
||||
}
|
||||
if(videoids.size() > 0) {
|
||||
List<Video> videos = api.getInfos(videoids)[0];
|
||||
db.addVideos(videos);
|
||||
}
|
||||
}
|
||||
|
||||
//at the beginning there is maybe just one video to crawl, so keep it calm.
|
||||
if(startup > 0) {
|
||||
startup --;
|
||||
currentstate = "startup sleep";
|
||||
log.info("startup sleep");
|
||||
//writing crawlfile
|
||||
log.info("Writing Crawlfile");
|
||||
currentstate = "writing crawlfile";
|
||||
// listlock.writeLock().lock();
|
||||
try {
|
||||
Thread.sleep(2000);
|
||||
} catch(InterruptedException e) {}
|
||||
PrintWriter p = new PrintWriter(new BufferedWriter(new FileWriter(crawlfile)));
|
||||
for(String t : toCrawl) {
|
||||
p.println(t);
|
||||
}
|
||||
p.println("-");
|
||||
for(String t : toknown) {
|
||||
p.println(t);
|
||||
}
|
||||
p.close();
|
||||
} catch (IOException e) {
|
||||
log.error("Error writing crawlfile.", e);
|
||||
}
|
||||
|
||||
//get reports
|
||||
currentstate = "get report";
|
||||
log.info("get report");
|
||||
int count = 0;
|
||||
for (CrawlerThread crawlerThread : threads) {
|
||||
LinkedList<String>[] report = crawlerThread.report();
|
||||
crawlcount+= report[0].size();
|
||||
toSave.addAll(report[0]);
|
||||
crawlerThread.crawled.clear();
|
||||
while(report[1].size() > 15) {
|
||||
LinkedList<String> store = new LinkedList<>();
|
||||
try {
|
||||
while(!report[1].isEmpty() && store.size() < 50) {
|
||||
store.add(report[1].removeFirst());
|
||||
|
||||
count++;
|
||||
}
|
||||
} catch(NoSuchElementException ignored) {//concurrentmodification fuckery
|
||||
log.info("no suchelement bla");
|
||||
}
|
||||
db.storeTemp(store);
|
||||
}
|
||||
log.info(count + " videos added.");
|
||||
crawlerThread.found.clear();
|
||||
}
|
||||
db.deleteDouble();
|
||||
|
||||
long runtimes = (System.currentTimeMillis() - start) / 1000;
|
||||
if(runtimes < 0)
|
||||
runtimes = 1;
|
||||
float vidps = (crawlcount / (float) runtimes);//videos per second
|
||||
Main.getMain().broadcastAdmin(vidps + "v/s " + crawlcount + " total V");
|
||||
|
||||
//save to db
|
||||
currentstate = "save to DB";
|
||||
log.info("save " + toSave.size() + " videos to DB.");
|
||||
while(!toSave.isEmpty()) {
|
||||
LinkedList<String> videoids = new LinkedList<>();
|
||||
for(int i = 0; i < 50 && !toSave.isEmpty(); i++) {
|
||||
videoids.add(toSave.remove(0));
|
||||
}
|
||||
if(videoids.size() > 0) {
|
||||
List<Video> videos = api.getInfos(videoids)[0];
|
||||
db.addVideos(videos);
|
||||
}
|
||||
}
|
||||
|
||||
//at the beginning there is maybe just one video to crawl, so keep it calm.
|
||||
if(startup > 0) {
|
||||
startup --;
|
||||
currentstate = "startup sleep";
|
||||
log.info("startup sleep");
|
||||
try {
|
||||
Thread.sleep(2000);
|
||||
} catch(InterruptedException e) {}
|
||||
}
|
||||
} catch(Throwable t) {
|
||||
log.warn("exception in Crawler!", t);
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for(StackTraceElement elem : t.getStackTrace()) {
|
||||
sb.append(elem.getFileName() + "(").append(elem.getMethodName() + ":").append(elem.getLineNumber() + ")\n");
|
||||
}
|
||||
Main.getMain().broadcastAdmin("Excpetion in crawler: " + t.toString() + "\n" + sb.toString() );
|
||||
crawl = false;
|
||||
Main.getMain().stop();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -270,12 +288,12 @@ public class Crawler implements Runnable {
|
|||
return new Video();
|
||||
}
|
||||
|
||||
public String printStats() {
|
||||
public String getStats() {
|
||||
long runtimes = (System.currentTimeMillis() - start) / 1000;
|
||||
if(runtimes < 0)
|
||||
runtimes = 1;
|
||||
int runtimem = (int) (runtimes / 60);
|
||||
float vidps = (crawlcount / (float) runtimes);//videos per second
|
||||
int runtimem = (int) (runtimes / 60);
|
||||
String out = "";
|
||||
out += "ToCrawl: " + toCrawl.size();
|
||||
out += "\nToknown: " + toknown.size();
|
||||
|
|
|
@ -43,7 +43,7 @@ public class DB {
|
|||
con.setCatalog(db);
|
||||
update("CREATE TABLE `videos` (`id` varchar(13) NOT NULL,`length` int(11) NOT NULL,`created` int(11) NOT NULL,`langcode` varchar(3) NOT NULL DEFAULT 'en',`category` int(11) DEFAULT NULL, PRIMARY KEY (`id`), UNIQUE KEY `id_UNIQUE` (`id`)) ENGINE=InnoDB DEFAULT CHARSET=latin1;");
|
||||
update("CREATE TABLE `temp` ( `ytid` varchar(13) NOT NULL COMMENT 'a Table to store Video ids, when they are found to process them later', PRIMARY KEY (`ytid`), UNIQUE KEY `ytid_UNIQUE` (`ytid`)) ENGINE=InnoDB DEFAULT CHARSET=utf8;");
|
||||
|
||||
|
||||
log.info("Database is set up!");
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
|
@ -97,7 +97,8 @@ public class DB {
|
|||
if(i > 0)
|
||||
sb.append(',');
|
||||
Video v = input.get(i);
|
||||
sb.append("('").append(v.id).append("','").append(v.length).append("','").append(v.created).append("','").append(v.languageCode).append("','").append(v.categorie).append("','").append(v.title).append("','").append(v.channel).append("','").append(v.tags).append("') ");
|
||||
if(v != null)
|
||||
sb.append("('").append(v.id).append("','").append(v.length).append("','").append(v.created).append("','").append(v.languageCode).append("','").append(v.categorie).append("','").append(v.title).append("','").append(v.channel).append("','").append(v.tags).append("') ");
|
||||
}
|
||||
if(sb.length() > 2) {
|
||||
String qu = "INSERT IGNORE INTO `ytcrawler`.`videos`(`id`, `length`, `created`, `langcode`, `category`, `videotitle`, `channel`, `tags`) VALUES " + sb.toString();
|
||||
|
@ -195,18 +196,22 @@ public class DB {
|
|||
}
|
||||
|
||||
public LinkedList<String> restoreTemp() {
|
||||
ResultSet res = query("SELECT * FROM `ytcrawler`.`temp` LIMIT 0,500;");
|
||||
ResultSet res = query("SELECT * FROM `ytcrawler`.`temp` LIMIT 500;");
|
||||
LinkedList<String> out = new LinkedList<>();
|
||||
log.info("RestoreTemp");
|
||||
try {
|
||||
while(res.next()) {
|
||||
out.add(res.getString(1));
|
||||
}
|
||||
update("DELETE FROM `ytcrawler`.`temp` LIMIT 0,500;");
|
||||
update("DELETE FROM `ytcrawler`.`temp` LIMIT 500;");
|
||||
} catch (Exception e) {}
|
||||
return out;
|
||||
}
|
||||
|
||||
public void deleteDouble() {
|
||||
update("call ytcrawler.deletedouble();");
|
||||
}
|
||||
|
||||
public void storeTemp(LinkedList<String> strings) {
|
||||
if(!strings.isEmpty()) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package de.mrbesen.youtubecrawler;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Random;
|
||||
import java.util.Scanner;
|
||||
|
@ -16,16 +17,22 @@ import de.mrbesen.telegram.objects.TUser;
|
|||
|
||||
public class Main implements CommandHandler, EventListener{
|
||||
|
||||
private ArrayList<TUser> admins = new ArrayList<>();
|
||||
private ArrayList<String> admins = new ArrayList<>();//usernames of admins
|
||||
private String adminstr = null;
|
||||
private long setadminstr = -1;
|
||||
private static String abc = "abcdefghijklmnopqrstuvwxyz";
|
||||
private Logger log = Logger.getLogger(this.getClass().getName());
|
||||
private TelegramAPI tapi;
|
||||
private Thread mainthread;
|
||||
private static Main main;
|
||||
|
||||
public static Main getMain() {
|
||||
return main;
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
new Main().run();
|
||||
main = new Main();
|
||||
main.run();
|
||||
}
|
||||
|
||||
private Crawler cra;
|
||||
|
@ -53,8 +60,23 @@ public class Main implements CommandHandler, EventListener{
|
|||
tapi.getCommandManager().registerCommand("stop", this);
|
||||
tapi.getEventManager().registerEvent(this);
|
||||
tapi.setHelpText("Send the command /random to get a random video.");
|
||||
tapi.setUpdateInterval(25000);
|
||||
tapi.start();
|
||||
|
||||
//load admins
|
||||
Log.l.info("Loading admins.");
|
||||
try {
|
||||
Scanner scan = new Scanner(new File("admins"));
|
||||
while(scan.hasNext()) {
|
||||
String line = scan.nextLine().trim();
|
||||
if(!line.isEmpty()) {
|
||||
admins.add(line);
|
||||
}
|
||||
}
|
||||
scan.close();
|
||||
} catch (IOException | NumberFormatException | ArrayIndexOutOfBoundsException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
//CLI
|
||||
Scanner s = new Scanner(System.in);
|
||||
String in;
|
||||
|
@ -71,7 +93,7 @@ public class Main implements CommandHandler, EventListener{
|
|||
}
|
||||
} else if(in.equalsIgnoreCase("stats")) {
|
||||
log.info("Getting Stats");
|
||||
for(String line : cra.printStats().split("\n")) {
|
||||
for(String line : cra.getStats().split("\n")) {
|
||||
log.info(line);
|
||||
}
|
||||
}
|
||||
|
@ -80,7 +102,8 @@ public class Main implements CommandHandler, EventListener{
|
|||
log.info("Terminated.");
|
||||
}
|
||||
|
||||
private void stop() {
|
||||
public void stop() {
|
||||
log.info("Stop.");
|
||||
tapi.stop();
|
||||
cra.stop();
|
||||
mainthread.interrupt();
|
||||
|
@ -93,7 +116,7 @@ public class Main implements CommandHandler, EventListener{
|
|||
sender.sendMessage("https://youtube.com/watch?v=" + ytid);
|
||||
return true;
|
||||
} else if(cmd.equals("admin")) {
|
||||
if(admins.contains(sender)) {
|
||||
if(admins.contains(sender.getName())) {
|
||||
sender.sendMessage("You are admin.");
|
||||
return true;
|
||||
} else {
|
||||
|
@ -102,12 +125,12 @@ public class Main implements CommandHandler, EventListener{
|
|||
log.info("Adminstr: " + adminstr);
|
||||
}
|
||||
} else if(cmd.equals("stats")) {
|
||||
if(admins.contains(sender)) {
|
||||
sender.sendMessage(cra.printStats());
|
||||
if(admins.contains(sender.getName())) {
|
||||
sender.sendMessage(cra.getStats());
|
||||
return true;
|
||||
}
|
||||
} else if(cmd.equals("stop")) {
|
||||
if(admins.contains(sender)) {
|
||||
if(admins.contains(sender.getName())) {
|
||||
stop();
|
||||
sender.sendMessage("Stop.");
|
||||
log.info("Stopped via Telegram by " + sender.getFirstName());
|
||||
|
@ -117,6 +140,12 @@ public class Main implements CommandHandler, EventListener{
|
|||
return false;
|
||||
}
|
||||
|
||||
public void broadcastAdmin(String msg) {
|
||||
for(String admin : admins) {
|
||||
tapi.getUser(admin).sendMessage(msg);
|
||||
}
|
||||
}
|
||||
|
||||
private String getRandomStr(int length) {
|
||||
Random rand = new Random();
|
||||
String out = "";
|
||||
|
@ -132,11 +161,11 @@ public class Main implements CommandHandler, EventListener{
|
|||
if(e.getMessage() != null && (System.currentTimeMillis() - setadminstr) / 1000 < 60) {
|
||||
if(e.getMessage().getText() != null) {
|
||||
if(e.getMessage().getText().equals(adminstr)) {
|
||||
admins.add(e.getUser());
|
||||
admins.add(e.getUser().getName());
|
||||
e.getMessage().reply("You are now Admin!");
|
||||
adminstr = null;
|
||||
setadminstr = -1;
|
||||
log.info(e.getUser().getFirstName() + " is now Admin!");
|
||||
log.info(e.getUser().getID() + " " + e.getUser().getName() + " " + e.getUser().getFirstName() + " is now Admin!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue