YoutubeCrawler/src/main/de/mrbesen/youtubecrawler/DB.java

345 lines
10 KiB
Java

package de.mrbesen.youtubecrawler;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import org.apache.log4j.Logger;
import com.mysql.cj.jdbc.exceptions.MysqlDataTruncation;
import de.mrbesen.youtubecrawler.Crawler.Video;
public class DB implements Runnable {
private Connection con;
private String server = Config.prop.getProperty("db.host", "localhost"), user = Config.prop.getProperty("db.user", "ytcrawler"), pw = Config.prop.getProperty("db.pw", ""), db = Config.prop.getProperty("db.dbname", "ytcrawler");
private int port = Integer.parseInt(Config.prop.getProperty("db.port", "3306"));
private Logger log = Logger.getLogger(DB.class.getName());
private ArrayList<String> randombuffer = new ArrayList<>(100);
private Random rand = new Random();
private Thread randomrefill = null;
private int dbsize = 0;
private StringBuilder tostorebuffer;
private int writebuffersize = 500;
private int writebuffercurrentsize = 0;
private StringBuilder totempbuffer;
private int writetempbuffercurrentsize = 0;
private final int TEMPBUFFERRATIO = 15;
private final int STOREBUFFERRATIO = 100;
public DB() {
try {
connect(false);
//set the database up!
boolean found = false;
ResultSet set = con.getMetaData().getCatalogs();//does the db exists?
while(set.next()) {
if(set.getString(1).equalsIgnoreCase(db)) {
found = true;
con.setCatalog(db);
break;
}
}
if(!found) {//DataBase not found, try to create
log.warn("Database not found! tring to create!");
//create DB, table: konten / player / Transactions
update("CREATE DATABASE `" + db + "` /*!40100 DEFAULT CHARACTER SET latin1*/;");
con.setCatalog(db);
update("CREATE TABLE `videos` (`id` varchar(13) NOT NULL,`length` int(11) NOT NULL,`created` int(11) NOT NULL,`langcode` varchar(3) NOT NULL DEFAULT 'en',`category` int(11) DEFAULT NULL, PRIMARY KEY (`id`), UNIQUE KEY `id_UNIQUE` (`id`)) ENGINE=InnoDB DEFAULT CHARSET=latin1;");
update("CREATE TABLE `temp` ( `ytid` varchar(13) NOT NULL COMMENT 'a Table to store Video ids, when they are found to process them later', PRIMARY KEY (`ytid`), UNIQUE KEY `ytid_UNIQUE` (`ytid`)) ENGINE=InnoDB DEFAULT CHARSET=utf8;");
log.info("Database is set up!");
}
refillbuffer();
//get db size
dbsize();
//config data
try {
writebuffersize = Integer.parseInt(Config.prop.getProperty("db.writebuffersize"));
} catch(NumberFormatException e) {
log.warn("could not read the number \"" + Config.prop.getProperty("db.writebuffersize") + "\" from the config file. db.writebuffersize");
}
tostorebuffer = new StringBuilder(writebuffersize * STOREBUFFERRATIO);
totempbuffer = new StringBuilder(writebuffersize * TEMPBUFFERRATIO);
} catch (SQLException e) {
log.error("Error while connecting to the database! ", e);
}
}
private void dbsize() {
try {
ResultSet set = query("SELECT count(*) as count FROM `videos`;");
if(set != null) {
if(set.next()) {
dbsize = set.getInt(1);
}
}
} catch(SQLException e) {
e.printStackTrace();
}
}
public int getDBSize() {
return dbsize;
}
private void connect(boolean selectdb) {
try {
//verbinden
con = DriverManager.getConnection("jdbc:mysql://" + server + ":" + port + "/" + (selectdb ? db : "") + "?serverTimezone=UTC&verifyServerCertificate=false&useSSL=true&useUnicode=true&characterEncoding=utf-8", user, pw);
}catch (SQLException e) {
log.error("Error while connecting to the database! ", e);
}
}
/**
* removes all videos, that are known from the db
* @param input
* @return
*/
public List<String> checkvideos(List<String> input) {
if(!input.isEmpty()) {
StringBuilder ids = new StringBuilder();
for(int i = 0; i < input.size(); i++) {
ids.append(',').append(input.get(i));
}
String query = "SELECT `id` FROM `videos` WHERE concat('%',`id`,'%') LIKE '" + ids.toString() + "';";
ResultSet res = query(query);
try {
while(res.next()) {
input.remove(res.getString(1));
}
} catch(SQLException e) {
e.printStackTrace();
}
}
return input;
}
/**
* save the list of videos to the DB
* @param input
*/
public void addVideos(ArrayList<Video> input, boolean force) {
if(input != null) {
if(input.size() > 0) {
writebuffercurrentsize += input.size();
for(int i = 0; i < input.size(); i++) {
Video v = input.get(i);
if(v != null)
tostorebuffer.append("('").append(escape(v.id)).append("',").append(v.length).append(",").append(v.created).append(",'").append(escape(v.languageCode)).append("',").append(v.categorie).append(",'").append(escape(v.title)).append("','").append(escape(v.channel)).append("','").append(escape(v.tags)).append("'),");
}
}
}
if(writebuffercurrentsize > writebuffersize || force) {
if(tostorebuffer.length() > 10) {
log.info("Write databuffer to DB video count: " + writebuffercurrentsize);
dbsize += writebuffercurrentsize;
tostorebuffer.deleteCharAt(tostorebuffer.length()-1);//delete trailing ','
String qu = "INSERT IGNORE INTO `videos`(`id`, `length`, `created`, `langcode`, `category`, `videotitle`, `channel`, `tags`) VALUES " + tostorebuffer.toString();
update(qu);
//reset buffer
writebuffercurrentsize = 0;
tostorebuffer = new StringBuilder(writebuffersize * STOREBUFFERRATIO);
}
}
}
private String escape(String e) {
return e.replace("'", "\\'");
}
public void updateVideos(List<Video> input) {
log.info("Updateing " + input.size() + " videos.");
for(Video v : input) {
if(v != null)
updateVideo(v);
}
}
private void updateVideo(Video v) {
try {
String qu = "UPDATE `videos` SET `length` = " + v.length + ", `created` = " + v.created + ", `langcode` = SUBSTR('" + v.languageCode + "', 1, 3) ,`category` = " + v.categorie + ",`videotitle` = SUBSTR('" + escape(v.title) + "',1,100),`channel` = SUBSTR('" + escape(v.channel) + "',1,20),`tags` = '" + escape(v.tags) + "' WHERE `id` = '" + escape(v.id) + "';";
update(qu);
} catch(NullPointerException e) {
}
}
public LinkedList<String> getUncompleted(int limit, int offset) {
LinkedList<String> out = new LinkedList<>();
String sql = "SELECT `id` FROM `videos` WHERE `channel` IS NULL LIMIT " + offset + "," + limit + ";";
ResultSet resu = query(sql);
try {
while(resu.next()) {
out.add(resu.getString(1));
}
} catch (SQLException e) {
log.info("error", e);
}
return out;
}
public void removeVideos(LinkedList<Video> vids) {
log.info("Delete " + vids.size() + " videos.");
for(Video s : vids) {
update("DELETE FROM `videos` WHERE `id`='" + escape(s.id) + "';");
}
}
/**
* instant query
* @param q
* @return Das resultSet der Query
*/
public ResultSet query(String q) {
try {
if(con.isClosed()) {
connect(true);
}
return con.prepareStatement(q).executeQuery();
} catch (SQLException e) {
log.error("Fehler bim ausführen der Query: " + q, e);
}
return null;//ERROR!
}
/**
* instant update
* @param q
*/
public void update(String q) {
try {
if(con.isClosed()) {
connect(true);
}
con.prepareStatement(q).executeUpdate();
} catch (MysqlDataTruncation ignore) {
log.info("truncated.", ignore);
} catch (SQLException e) {
log.error("Fehler bim ausführen der Update-Query: " + q, e);
}
}
private void refillbuffer() {
if(randomrefill == null) {
randomrefill = new Thread(this, "Randomrefill");
randomrefill.start();
}
}
public String getRandom() {
log.info("Get random Video");
if(randombuffer.size() < 10 ) {
refillbuffer();
}
if(randombuffer.isEmpty()) {
log.warn("randombuffer is empty!");
return null;
}
return randombuffer.remove(0);
}
public int getRandomCount() {
return randombuffer.size();
}
public LinkedList<String> restoreTemp() {
ResultSet res = query("SELECT * FROM `temp` LIMIT 500;");
LinkedList<String> out = new LinkedList<>();
log.info("RestoreTemp");
try {
while(res.next()) {
out.add(res.getString(1));
}
update("DELETE FROM `temp` LIMIT 500;");
} catch (Exception e) {}
return out;
}
public void deleteDouble() {
log.info("Started Delete Double");
long start = System.currentTimeMillis();
update("CALL deletedouble();");
log.info("Delete Double done in " + ((System.currentTimeMillis() - start)/60000) + " min");
}
public void storeTemp(ArrayList<String> strings, boolean force) {
if(strings == null)
return;
if(!strings.isEmpty()) {
log.info("store Temp to buffer: " + strings.size());
writetempbuffercurrentsize += strings.size();
for(String s : strings) {
totempbuffer.append("('").append(escape(s)).append("'),");
}
}
if(writetempbuffercurrentsize > writebuffersize || force) {
log.info("Write Buffer: " + writetempbuffercurrentsize);
totempbuffer.deleteCharAt(totempbuffer.length()-1);//delete trailing ','
String qu = "INSERT IGNORE INTO `temp` (`ytid`) VALUES " + totempbuffer.toString() + ";";
update(qu);
//reset
writetempbuffercurrentsize = 0;
totempbuffer = new StringBuilder(writebuffersize * TEMPBUFFERRATIO);
}
}
/**
* Stops the randomnes-Server and disconnect
*/
public void stop() {
try {
if(con != null) {
if(!con.isClosed()) {
addVideos(null, true);
con.close();
}
}
} catch (SQLException e) {
e.printStackTrace();
}
}
/**
* runable, of Thread for randomrefill
*/
@Override
public void run() {
log.info("Started Refilling.");
try {
ResultSet set = query("SELECT `id` FROM `videos` ORDER BY rand() LIMIT 100;");
if(set != null) {
while(set.next()) {
randombuffer.add(set.getString(1));
}
log.info("refilled randombuffer to " + randombuffer.size() + " videos.");
}
} catch (SQLException e) {
log.warn("error getting a random video", e);
}
if(randombuffer.isEmpty()) {
log.error("Unable to retrieve RandomVideos");
}
randomrefill = null;
}
}