343 lines
9.8 KiB
Java
343 lines
9.8 KiB
Java
package de.mrbesen.youtubecrawler;
|
|
|
|
import java.sql.Connection;
|
|
import java.sql.DriverManager;
|
|
import java.sql.ResultSet;
|
|
import java.sql.SQLException;
|
|
import java.util.ArrayList;
|
|
import java.util.LinkedList;
|
|
import java.util.List;
|
|
import java.util.Random;
|
|
|
|
import org.apache.log4j.Logger;
|
|
|
|
import com.mysql.cj.jdbc.exceptions.MysqlDataTruncation;
|
|
|
|
import de.mrbesen.youtubecrawler.Crawler.Video;
|
|
|
|
public class DB implements Runnable {
|
|
|
|
private Connection con;
|
|
private String server = Config.prop.getProperty("db.host", "localhost"), user = Config.prop.getProperty("db.user", "ytcrawler"), pw = Config.prop.getProperty("db.pw", ""), db = Config.prop.getProperty("db.dbname", "ytcrawler");
|
|
private int port = Integer.parseInt(Config.prop.getProperty("db.port", "3306"));
|
|
private Logger log = Logger.getLogger(DB.class.getName());
|
|
private ArrayList<String> randombuffer = new ArrayList<>(100);
|
|
private Random rand = new Random();
|
|
private Thread randomrefill = null;
|
|
private int dbsize = 0;
|
|
|
|
private StringBuilder tostorebuffer ;
|
|
private int writebuffersize = 500;
|
|
private int writebuffercurrentsize = 0;
|
|
|
|
private StringBuilder totempbuffer;
|
|
private int writetempbuffercurrentsize = 0;
|
|
|
|
public DB() {
|
|
try {
|
|
connect(false);
|
|
|
|
//set the database up!
|
|
boolean found = false;
|
|
ResultSet set = con.getMetaData().getCatalogs();//does the db exists?
|
|
while(set.next()) {
|
|
if(set.getString(1).equalsIgnoreCase(db)) {
|
|
found = true;
|
|
con.setCatalog(db);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if(!found) {//DataBase not found, try to create
|
|
log.warn("Database not found! tring to create!");
|
|
//create DB, table: konten / player / Transactions
|
|
update("CREATE DATABASE `" + db + "` /*!40100 DEFAULT CHARACTER SET latin1*/;");
|
|
con.setCatalog(db);
|
|
update("CREATE TABLE `videos` (`id` varchar(13) NOT NULL,`length` int(11) NOT NULL,`created` int(11) NOT NULL,`langcode` varchar(3) NOT NULL DEFAULT 'en',`category` int(11) DEFAULT NULL, PRIMARY KEY (`id`), UNIQUE KEY `id_UNIQUE` (`id`)) ENGINE=InnoDB DEFAULT CHARSET=latin1;");
|
|
update("CREATE TABLE `temp` ( `ytid` varchar(13) NOT NULL COMMENT 'a Table to store Video ids, when they are found to process them later', PRIMARY KEY (`ytid`), UNIQUE KEY `ytid_UNIQUE` (`ytid`)) ENGINE=InnoDB DEFAULT CHARSET=utf8;");
|
|
|
|
log.info("Database is set up!");
|
|
}
|
|
|
|
refillbuffer();
|
|
|
|
//get db size
|
|
dbsize();
|
|
|
|
//config data
|
|
try {
|
|
writebuffersize = Integer.parseInt(Config.prop.getProperty("db.writebuffersize"));
|
|
} catch(NumberFormatException e) {
|
|
log.warn("could not read the number \"" + Config.prop.getProperty("db.writebuffersize") + "\" from the config file. db.writebuffersize");
|
|
}
|
|
tostorebuffer = new StringBuilder(writebuffersize);
|
|
totempbuffer = new StringBuilder(writebuffersize);
|
|
} catch (SQLException e) {
|
|
log.error("Error while connecting to the database! ", e);
|
|
}
|
|
}
|
|
|
|
private void dbsize() {
|
|
try {
|
|
ResultSet set = query("SELECT count(*) as count FROM `videos`;");
|
|
if(set != null) {
|
|
if(set.next()) {
|
|
dbsize = set.getInt(1);
|
|
}
|
|
}
|
|
} catch(SQLException e) {
|
|
e.printStackTrace();
|
|
}
|
|
}
|
|
|
|
public int getDBSize() {
|
|
return dbsize;
|
|
}
|
|
|
|
private void connect(boolean selectdb) {
|
|
try {
|
|
//verbinden
|
|
con = DriverManager.getConnection("jdbc:mysql://" + server + ":" + port + "/" + (selectdb ? db : "") + "?serverTimezone=UTC&verifyServerCertificate=false&useSSL=true&useUnicode=true&characterEncoding=utf-8", user, pw);
|
|
}catch (SQLException e) {
|
|
log.error("Error while connecting to the database! ", e);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* removes all videos, that are known from the db
|
|
* @param input
|
|
* @return
|
|
*/
|
|
public List<String> checkvideos(List<String> input) {
|
|
if(!input.isEmpty()) {
|
|
StringBuilder ids = new StringBuilder();
|
|
for(int i = 0; i < input.size(); i++) {
|
|
ids.append(',').append(input.get(i));
|
|
}
|
|
String query = "SELECT `id` FROM `videos` WHERE concat('%',`id`,'%') LIKE '" + ids.toString() + "';";
|
|
ResultSet res = query(query);
|
|
try {
|
|
while(res.next()) {
|
|
input.remove(res.getString(1));
|
|
}
|
|
} catch(SQLException e) {
|
|
e.printStackTrace();
|
|
}
|
|
}
|
|
return input;
|
|
}
|
|
|
|
/**
|
|
* save the list of videos to the DB
|
|
* @param input
|
|
*/
|
|
public void addVideos(ArrayList<Video> input, boolean force) {
|
|
if(input != null) {
|
|
if(input.size() > 0) {
|
|
writebuffercurrentsize += input.size();
|
|
for(int i = 0; i < input.size(); i++) {
|
|
Video v = input.get(i);
|
|
if(v != null)
|
|
tostorebuffer.append(",('").append(escape(v.id)).append("',").append(v.length).append(",").append(v.created).append(",'").append(escape(v.languageCode)).append("',").append(v.categorie).append(",'").append(escape(v.title)).append("','").append(escape(v.channel)).append("','").append(escape(v.tags)).append("') ");
|
|
}
|
|
}
|
|
}
|
|
if(writebuffercurrentsize > writebuffersize || force) {
|
|
if(tostorebuffer.length() > 10) {
|
|
log.info("Write databuffer to DB video count: " + writebuffercurrentsize);
|
|
dbsize += writebuffercurrentsize;
|
|
tostorebuffer.deleteCharAt(0);//delete leading ','
|
|
String qu = "INSERT IGNORE INTO `videos`(`id`, `length`, `created`, `langcode`, `category`, `videotitle`, `channel`, `tags`) VALUES " + tostorebuffer.toString();
|
|
update(qu);
|
|
|
|
//reset buffer
|
|
writebuffercurrentsize = 0;
|
|
tostorebuffer = new StringBuilder(writebuffersize);
|
|
}
|
|
}
|
|
}
|
|
|
|
private String escape(String e) {
|
|
return e.replace("'", "\\'");
|
|
}
|
|
|
|
public void updateVideos(List<Video> input) {
|
|
log.info("Updateing " + input.size() + " videos.");
|
|
for(Video v : input) {
|
|
if(v != null)
|
|
updateVideo(v);
|
|
}
|
|
}
|
|
|
|
private void updateVideo(Video v) {
|
|
try {
|
|
String qu = "UPDATE `videos` SET `length` = " + v.length + ", `created` = " + v.created + ", `langcode` = SUBSTR('" + v.languageCode + "', 1, 3) ,`category` = " + v.categorie + ",`videotitle` = SUBSTR('" + escape(v.title) + "',1,100),`channel` = SUBSTR('" + escape(v.channel) + "',1,20),`tags` = '" + escape(v.tags) + "' WHERE `id` = '" + escape(v.id) + "';";
|
|
update(qu);
|
|
} catch(NullPointerException e) {
|
|
|
|
}
|
|
}
|
|
|
|
public LinkedList<String> getUncompleted(int limit, int offset) {
|
|
LinkedList<String> out = new LinkedList<>();
|
|
String sql = "SELECT `id` FROM `videos` WHERE `channel` IS NULL LIMIT " + offset + "," + limit + ";";
|
|
ResultSet resu = query(sql);
|
|
try {
|
|
while(resu.next()) {
|
|
out.add(resu.getString(1));
|
|
}
|
|
} catch (SQLException e) {
|
|
log.info("error", e);
|
|
}
|
|
return out;
|
|
}
|
|
|
|
public void removeVideos(LinkedList<Video> vids) {
|
|
log.info("Delete " + vids.size() + " videos.");
|
|
for(Video s : vids) {
|
|
update("DELETE FROM `videos` WHERE `id`='" + escape(s.id) + "';");
|
|
}
|
|
}
|
|
|
|
/**
|
|
* instant query
|
|
* @param q
|
|
* @return Das resultSet der Query
|
|
*/
|
|
|
|
public ResultSet query(String q) {
|
|
try {
|
|
if(con.isClosed()) {
|
|
connect(true);
|
|
}
|
|
return con.prepareStatement(q).executeQuery();
|
|
} catch (SQLException e) {
|
|
log.error("Fehler bim ausführen der Query: " + q, e);
|
|
}
|
|
return null;//ERROR!
|
|
}
|
|
|
|
/**
|
|
* instant update
|
|
* @param q
|
|
*/
|
|
public void update(String q) {
|
|
try {
|
|
if(con.isClosed()) {
|
|
connect(true);
|
|
}
|
|
con.prepareStatement(q).executeUpdate();
|
|
} catch (MysqlDataTruncation ignore) {
|
|
log.info("truncated.", ignore);
|
|
} catch (SQLException e) {
|
|
log.error("Fehler bim ausführen der Update-Query: " + q, e);
|
|
}
|
|
}
|
|
|
|
private void refillbuffer() {
|
|
if(randomrefill == null) {
|
|
randomrefill = new Thread(this, "Randomrefill");
|
|
randomrefill.start();
|
|
}
|
|
}
|
|
|
|
public String getRandom() {
|
|
log.info("Get random Video");
|
|
if(randombuffer.size() < 10 ) {
|
|
refillbuffer();
|
|
}
|
|
if(randombuffer.isEmpty()) {
|
|
log.warn("randombuffer is empty!");
|
|
return null;
|
|
}
|
|
return randombuffer.remove(0);
|
|
}
|
|
|
|
public int getRandomCount() {
|
|
return randombuffer.size();
|
|
}
|
|
|
|
public LinkedList<String> restoreTemp() {
|
|
ResultSet res = query("SELECT * FROM `temp` LIMIT 500;");
|
|
LinkedList<String> out = new LinkedList<>();
|
|
log.info("RestoreTemp");
|
|
try {
|
|
while(res.next()) {
|
|
out.add(res.getString(1));
|
|
}
|
|
update("DELETE FROM `temp` LIMIT 500;");
|
|
} catch (Exception e) {}
|
|
return out;
|
|
}
|
|
|
|
public void deleteDouble() {
|
|
log.info("Started Delete Double");
|
|
long start = System.currentTimeMillis();
|
|
update("CALL deletedouble();");
|
|
log.info("Delete Double done in " + ((System.currentTimeMillis() - start)/60000) + " min");
|
|
}
|
|
|
|
public void storeTemp(ArrayList<String> strings, boolean force) {
|
|
if(strings == null)
|
|
return;
|
|
if(!strings.isEmpty()) {
|
|
log.info("store Temp to buffer: " + strings.size());
|
|
writetempbuffercurrentsize += strings.size();
|
|
for(String s : strings) {
|
|
totempbuffer.append(", ('").append(escape(s)).append("')");
|
|
}
|
|
}
|
|
if(writetempbuffercurrentsize > writebuffersize || force) {
|
|
log.info("Write Buffer: " + writetempbuffercurrentsize);
|
|
totempbuffer.deleteCharAt(0);//delete leading ','
|
|
String qu = "INSERT IGNORE INTO `temp` (`ytid`) VALUES " + totempbuffer.toString() + ";";
|
|
update(qu);
|
|
|
|
//reset
|
|
writetempbuffercurrentsize = 0;
|
|
totempbuffer = new StringBuilder(writebuffersize);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Stops the randomnes-Server and disconnect
|
|
*/
|
|
public void stop() {
|
|
try {
|
|
if(con != null) {
|
|
if(!con.isClosed()) {
|
|
addVideos(null, true);
|
|
con.commit();
|
|
con.close();
|
|
}
|
|
}
|
|
} catch (SQLException e) {
|
|
e.printStackTrace();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* runable, of Thread for randomrefill
|
|
*/
|
|
@Override
|
|
public void run() {
|
|
log.info("Started Refilling.");
|
|
try {
|
|
ResultSet set = query("SELECT `id` FROM `videos` ORDER BY rand() LIMIT 100;");
|
|
if(set != null) {
|
|
while(set.next()) {
|
|
randombuffer.add(set.getString(1));
|
|
}
|
|
log.info("refilled randombuffer to " + randombuffer.size() + " videos.");
|
|
}
|
|
} catch (SQLException e) {
|
|
log.warn("error getting a random video", e);
|
|
}
|
|
|
|
if(randombuffer.isEmpty()) {
|
|
log.error("Unable to retrieve RandomVideos");
|
|
}
|
|
randomrefill = null;
|
|
}
|
|
} |