2018-07-15 21:30:12 +02:00
package de.mrbesen.youtubecrawler ;
import java.sql.Connection ;
import java.sql.DriverManager ;
import java.sql.ResultSet ;
import java.sql.SQLException ;
2018-09-11 12:12:51 +02:00
import java.util.ArrayList ;
2018-07-20 19:57:43 +02:00
import java.util.LinkedList ;
2018-07-15 21:30:12 +02:00
import java.util.List ;
2018-09-11 12:12:51 +02:00
import java.util.Random ;
2018-07-15 21:30:12 +02:00
import org.apache.log4j.Logger ;
2018-07-20 19:57:43 +02:00
import com.mysql.cj.jdbc.exceptions.MysqlDataTruncation ;
2018-07-15 21:30:12 +02:00
import de.mrbesen.youtubecrawler.Crawler.Video ;
2018-09-11 12:12:51 +02:00
public class DB implements Runnable {
2018-07-15 21:30:12 +02:00
private Connection con ;
private String server = Config . prop . getProperty ( " db.host " , " localhost " ) , user = Config . prop . getProperty ( " db.user " , " ytcrawler " ) , pw = Config . prop . getProperty ( " db.pw " , " " ) , db = Config . prop . getProperty ( " db.dbname " , " ytcrawler " ) ;
private int port = Integer . parseInt ( Config . prop . getProperty ( " db.port " , " 3306 " ) ) ;
2018-07-15 22:09:37 +02:00
private Logger log = Logger . getLogger ( DB . class . getName ( ) ) ;
2018-09-11 12:12:51 +02:00
private ArrayList < String > randombuffer = new ArrayList < > ( 100 ) ;
private Random rand = new Random ( ) ;
private Thread randomrefill = null ;
2018-10-08 11:22:45 +02:00
private int dbsize = 0 ;
2018-11-14 11:39:21 +01:00
2018-10-12 20:03:53 +02:00
private StringBuilder tostorebuffer ;
2018-10-12 17:22:36 +02:00
private int writebuffersize = 500 ;
2018-10-12 20:03:53 +02:00
private int writebuffercurrentsize = 0 ;
2018-11-14 11:39:21 +01:00
private StringBuilder totempbuffer ;
private int writetempbuffercurrentsize = 0 ;
2018-07-15 21:30:12 +02:00
2018-07-16 12:43:56 +02:00
public DB ( ) {
2018-07-15 21:30:12 +02:00
try {
2018-07-16 12:43:56 +02:00
connect ( false ) ;
2018-07-16 23:22:32 +02:00
2018-07-15 21:30:12 +02:00
//set the database up!
boolean found = false ;
ResultSet set = con . getMetaData ( ) . getCatalogs ( ) ; //does the db exists?
while ( set . next ( ) ) {
if ( set . getString ( 1 ) . equalsIgnoreCase ( db ) ) {
found = true ;
con . setCatalog ( db ) ;
break ;
}
}
if ( ! found ) { //DataBase not found, try to create
log . warn ( " Database not found! tring to create! " ) ;
//create DB, table: konten / player / Transactions
update ( " CREATE DATABASE ` " + db + " ` /*!40100 DEFAULT CHARACTER SET latin1*/; " ) ;
con . setCatalog ( db ) ;
update ( " CREATE TABLE `videos` (`id` varchar(13) NOT NULL,`length` int(11) NOT NULL,`created` int(11) NOT NULL,`langcode` varchar(3) NOT NULL DEFAULT 'en',`category` int(11) DEFAULT NULL, PRIMARY KEY (`id`), UNIQUE KEY `id_UNIQUE` (`id`)) ENGINE=InnoDB DEFAULT CHARSET=latin1; " ) ;
2018-07-23 12:27:51 +02:00
update ( " CREATE TABLE `temp` ( `ytid` varchar(13) NOT NULL COMMENT 'a Table to store Video ids, when they are found to process them later', PRIMARY KEY (`ytid`), UNIQUE KEY `ytid_UNIQUE` (`ytid`)) ENGINE=InnoDB DEFAULT CHARSET=utf8; " ) ;
2018-09-11 00:05:10 +02:00
2018-07-16 23:22:32 +02:00
log . info ( " Database is set up! " ) ;
2018-07-23 12:27:51 +02:00
}
2018-10-12 17:22:36 +02:00
2018-09-11 12:12:51 +02:00
refillbuffer ( ) ;
2018-10-12 17:22:36 +02:00
2018-10-08 11:22:45 +02:00
//get db size
dbsize ( ) ;
2018-10-12 17:22:36 +02:00
//config data
try {
writebuffersize = Integer . parseInt ( Config . prop . getProperty ( " db.writebuffersize " ) ) ;
} catch ( NumberFormatException e ) {
log . warn ( " could not read the number \" " + Config . prop . getProperty ( " db.writebuffersize " ) + " \" from the config file. db.writebuffersize " ) ;
}
2018-10-12 20:03:53 +02:00
tostorebuffer = new StringBuilder ( writebuffersize ) ;
2018-11-14 11:39:21 +01:00
totempbuffer = new StringBuilder ( writebuffersize ) ;
2018-07-16 12:43:56 +02:00
} catch ( SQLException e ) {
2018-07-15 21:30:12 +02:00
log . error ( " Error while connecting to the database! " , e ) ;
}
2018-07-16 12:43:56 +02:00
}
2018-10-12 17:22:36 +02:00
2018-10-08 11:22:45 +02:00
private void dbsize ( ) {
try {
2018-10-12 17:22:36 +02:00
ResultSet set = query ( " SELECT count(*) as count FROM `videos`; " ) ;
2018-10-08 11:22:45 +02:00
if ( set ! = null ) {
if ( set . next ( ) ) {
dbsize = set . getInt ( 1 ) ;
}
}
} catch ( SQLException e ) {
e . printStackTrace ( ) ;
}
}
2018-10-12 17:22:36 +02:00
2018-10-08 11:22:45 +02:00
public int getDBSize ( ) {
return dbsize ;
}
2018-07-16 12:43:56 +02:00
private void connect ( boolean selectdb ) {
try {
//verbinden
2021-10-18 22:04:52 +02:00
con = DriverManager . getConnection ( " jdbc:mysql:// " + server + " : " + port + " / " + ( selectdb ? db : " " ) + " ?serverTimezone=UTC&verifyServerCertificate=false&useSSL=true&useUnicode=true&characterEncoding=utf-8 " , user , pw ) ;
2021-10-18 15:18:37 +02:00
} catch ( SQLException e ) {
2018-07-16 12:43:56 +02:00
log . error ( " Error while connecting to the database! " , e ) ;
}
2018-07-15 21:30:12 +02:00
}
/ * *
* removes all videos , that are known from the db
* @param input
* @return
* /
public List < String > checkvideos ( List < String > input ) {
2018-07-16 23:22:32 +02:00
if ( ! input . isEmpty ( ) ) {
StringBuilder ids = new StringBuilder ( ) ;
for ( int i = 0 ; i < input . size ( ) ; i + + ) {
ids . append ( ',' ) . append ( input . get ( i ) ) ;
}
String query = " SELECT `id` FROM `videos` WHERE concat('%',`id`,'%') LIKE ' " + ids . toString ( ) + " '; " ;
ResultSet res = query ( query ) ;
try {
while ( res . next ( ) ) {
input . remove ( res . getString ( 1 ) ) ;
}
} catch ( SQLException e ) {
e . printStackTrace ( ) ;
2018-07-15 21:30:12 +02:00
}
}
return input ;
}
/ * *
* save the list of videos to the DB
* @param input
* /
2018-10-12 20:03:53 +02:00
public void addVideos ( ArrayList < Video > input , boolean force ) {
2018-10-12 17:22:36 +02:00
if ( input ! = null ) {
if ( input . size ( ) > 0 ) {
2018-10-21 21:57:36 +02:00
writebuffercurrentsize + = input . size ( ) ;
2018-10-12 20:03:53 +02:00
for ( int i = 0 ; i < input . size ( ) ; i + + ) {
Video v = input . get ( i ) ;
if ( v ! = null )
2021-10-24 23:10:37 +02:00
tostorebuffer . append ( " ,(' " ) . append ( escape ( v . id ) ) . append ( " ', " ) . append ( v . length ) . append ( " , " ) . append ( v . created ) . append ( " ,' " ) . append ( escape ( v . languageCode ) ) . append ( " ', " ) . append ( v . categorie ) . append ( " ,' " ) . append ( escape ( v . title ) ) . append ( " ',' " ) . append ( escape ( v . channel ) ) . append ( " ',' " ) . append ( escape ( v . tags ) ) . append ( " ') " ) ;
2018-10-12 20:03:53 +02:00
}
2018-10-12 17:22:36 +02:00
}
}
2018-10-12 20:03:53 +02:00
if ( writebuffercurrentsize > writebuffersize | | force ) {
if ( tostorebuffer . length ( ) > 10 ) {
2018-11-14 11:39:21 +01:00
log . info ( " Write databuffer to DB video count: " + writebuffercurrentsize ) ;
2018-10-12 20:03:53 +02:00
dbsize + = writebuffercurrentsize ;
tostorebuffer . deleteCharAt ( 0 ) ; //delete leading ','
2021-10-18 17:19:22 +02:00
String qu = " INSERT IGNORE INTO `videos`(`id`, `length`, `created`, `langcode`, `category`, `videotitle`, `channel`, `tags`) VALUES " + tostorebuffer . toString ( ) ;
2018-07-20 19:57:43 +02:00
update ( qu ) ;
2018-11-14 11:39:21 +01:00
//reset buffer
writebuffercurrentsize = 0 ;
tostorebuffer = new StringBuilder ( writebuffersize ) ;
2018-07-15 21:30:12 +02:00
}
2018-07-20 19:57:43 +02:00
}
}
2018-07-23 12:27:51 +02:00
2021-10-24 23:10:37 +02:00
private String escape ( String e ) {
return e . replace ( " ' " , " \\ ' " ) ;
}
2018-07-20 19:57:43 +02:00
public void updateVideos ( List < Video > input ) {
log . info ( " Updateing " + input . size ( ) + " videos. " ) ;
for ( Video v : input ) {
if ( v ! = null )
updateVideo ( v ) ;
}
}
2018-07-23 12:27:51 +02:00
2018-07-20 19:57:43 +02:00
private void updateVideo ( Video v ) {
try {
2021-10-24 23:10:37 +02:00
String qu = " UPDATE `videos` SET `length` = " + v . length + " , `created` = " + v . created + " , `langcode` = SUBSTR(' " + v . languageCode + " ', 1, 3) ,`category` = " + v . categorie + " ,`videotitle` = SUBSTR(' " + escape ( v . title ) + " ',1,100),`channel` = SUBSTR(' " + escape ( v . channel ) + " ',1,20),`tags` = ' " + escape ( v . tags ) + " ' WHERE `id` = ' " + escape ( v . id ) + " '; " ;
2018-07-15 21:30:12 +02:00
update ( qu ) ;
2018-07-20 19:57:43 +02:00
} catch ( NullPointerException e ) {
2018-07-23 12:27:51 +02:00
2018-07-20 19:57:43 +02:00
}
}
2018-07-23 12:27:51 +02:00
2018-07-20 19:57:43 +02:00
public LinkedList < String > getUncompleted ( int limit , int offset ) {
LinkedList < String > out = new LinkedList < > ( ) ;
String sql = " SELECT `id` FROM `videos` WHERE `channel` IS NULL LIMIT " + offset + " , " + limit + " ; " ;
ResultSet resu = query ( sql ) ;
try {
while ( resu . next ( ) ) {
out . add ( resu . getString ( 1 ) ) ;
}
} catch ( SQLException e ) {
log . info ( " error " , e ) ;
}
return out ;
}
2018-07-23 12:27:51 +02:00
2018-07-20 19:57:43 +02:00
public void removeVideos ( LinkedList < Video > vids ) {
log . info ( " Delete " + vids . size ( ) + " videos. " ) ;
for ( Video s : vids ) {
2021-10-24 23:10:37 +02:00
update ( " DELETE FROM `videos` WHERE `id`=' " + escape ( s . id ) + " '; " ) ;
2018-07-15 21:30:12 +02:00
}
}
/ * *
* instant query
* @param q
* @return Das resultSet der Query
* /
2018-07-23 12:27:51 +02:00
public ResultSet query ( String q ) {
2018-07-15 21:30:12 +02:00
try {
2018-07-16 12:43:56 +02:00
if ( con . isClosed ( ) ) {
connect ( true ) ;
}
2018-07-15 21:30:12 +02:00
return con . prepareStatement ( q ) . executeQuery ( ) ;
} catch ( SQLException e ) {
log . error ( " Fehler bim ausführen der Query: " + q , e ) ;
}
return null ; //ERROR!
}
/ * *
* instant update
* @param q
* /
public void update ( String q ) {
try {
2018-07-16 12:43:56 +02:00
if ( con . isClosed ( ) ) {
connect ( true ) ;
}
2018-07-15 21:30:12 +02:00
con . prepareStatement ( q ) . executeUpdate ( ) ;
2018-07-20 19:57:43 +02:00
} catch ( MysqlDataTruncation ignore ) {
log . info ( " truncated. " , ignore ) ;
2018-07-15 21:30:12 +02:00
} catch ( SQLException e ) {
log . error ( " Fehler bim ausführen der Update-Query: " + q , e ) ;
}
}
2018-07-19 17:59:26 +02:00
2018-09-11 12:12:51 +02:00
private void refillbuffer ( ) {
if ( randomrefill = = null ) {
randomrefill = new Thread ( this , " Randomrefill " ) ;
randomrefill . start ( ) ;
}
}
2018-10-12 17:22:36 +02:00
2018-07-19 17:59:26 +02:00
public String getRandom ( ) {
2018-09-11 12:12:51 +02:00
log . info ( " Get random Video " ) ;
if ( randombuffer . size ( ) < 10 ) {
refillbuffer ( ) ;
}
if ( randombuffer . isEmpty ( ) ) {
log . warn ( " randombuffer is empty! " ) ;
2018-07-19 17:59:26 +02:00
return null ;
}
2018-09-11 12:12:51 +02:00
return randombuffer . remove ( 0 ) ;
2018-07-19 17:59:26 +02:00
}
2018-07-23 12:27:51 +02:00
2018-10-04 22:52:18 +02:00
public int getRandomCount ( ) {
return randombuffer . size ( ) ;
}
2018-10-12 17:22:36 +02:00
2018-07-23 12:27:51 +02:00
public LinkedList < String > restoreTemp ( ) {
2021-10-18 17:19:22 +02:00
ResultSet res = query ( " SELECT * FROM `temp` LIMIT 500; " ) ;
2018-07-23 12:27:51 +02:00
LinkedList < String > out = new LinkedList < > ( ) ;
log . info ( " RestoreTemp " ) ;
try {
while ( res . next ( ) ) {
out . add ( res . getString ( 1 ) ) ;
}
2021-10-18 17:19:22 +02:00
update ( " DELETE FROM `temp` LIMIT 500; " ) ;
2018-07-23 12:27:51 +02:00
} catch ( Exception e ) { }
return out ;
}
2018-09-11 00:05:10 +02:00
public void deleteDouble ( ) {
2018-09-11 12:12:51 +02:00
log . info ( " Started Delete Double " ) ;
long start = System . currentTimeMillis ( ) ;
2021-10-18 17:19:22 +02:00
update ( " CALL deletedouble(); " ) ;
2018-09-11 12:12:51 +02:00
log . info ( " Delete Double done in " + ( ( System . currentTimeMillis ( ) - start ) / 60000 ) + " min " ) ;
2018-09-11 00:05:10 +02:00
}
2018-11-14 11:39:21 +01:00
public void storeTemp ( ArrayList < String > strings , boolean force ) {
2018-10-11 15:32:19 +02:00
if ( strings = = null )
return ;
2018-07-23 12:27:51 +02:00
if ( ! strings . isEmpty ( ) ) {
2018-11-14 11:39:21 +01:00
log . info ( " store Temp to buffer: " + strings . size ( ) ) ;
writetempbuffercurrentsize + = strings . size ( ) ;
2018-07-23 12:27:51 +02:00
for ( String s : strings ) {
2021-10-24 23:10:37 +02:00
totempbuffer . append ( " , (' " ) . append ( escape ( s ) ) . append ( " ') " ) ;
2018-07-23 12:27:51 +02:00
}
2018-11-14 11:39:21 +01:00
}
if ( writetempbuffercurrentsize > writebuffersize | | force ) {
log . info ( " Write Buffer: " + writetempbuffercurrentsize ) ;
totempbuffer . deleteCharAt ( 0 ) ; //delete leading ','
2021-10-18 17:19:22 +02:00
String qu = " INSERT IGNORE INTO `temp` (`ytid`) VALUES " + totempbuffer . toString ( ) + " ; " ;
2018-11-14 11:39:21 +01:00
update ( qu ) ;
//reset
writetempbuffercurrentsize = 0 ;
totempbuffer = new StringBuilder ( writebuffersize ) ;
2018-07-23 12:27:51 +02:00
}
}
2018-09-11 12:12:51 +02:00
2018-10-12 17:22:36 +02:00
2018-09-11 12:12:51 +02:00
/ * *
* Stops the randomnes - Server and disconnect
* /
public void stop ( ) {
try {
if ( con ! = null ) {
if ( ! con . isClosed ( ) ) {
2018-10-12 17:22:36 +02:00
addVideos ( null , true ) ;
2021-10-25 17:51:46 +02:00
con . commit ( ) ;
2018-09-11 12:12:51 +02:00
con . close ( ) ;
}
}
} catch ( SQLException e ) {
e . printStackTrace ( ) ;
}
}
/ * *
* runable , of Thread for randomrefill
* /
@Override
public void run ( ) {
2018-09-25 23:56:20 +02:00
log . info ( " Started Refilling. " ) ;
2018-09-11 12:12:51 +02:00
try {
2021-10-18 15:36:01 +02:00
ResultSet set = query ( " SELECT `id` FROM `videos` ORDER BY rand() LIMIT 100; " ) ;
if ( set ! = null ) {
while ( set . next ( ) ) {
randombuffer . add ( set . getString ( 1 ) ) ;
2018-09-11 12:12:51 +02:00
}
2021-10-18 15:36:01 +02:00
log . info ( " refilled randombuffer to " + randombuffer . size ( ) + " videos. " ) ;
2018-09-11 12:12:51 +02:00
}
} catch ( SQLException e ) {
log . warn ( " error getting a random video " , e ) ;
}
2021-10-18 15:36:01 +02:00
2018-09-11 12:12:51 +02:00
if ( randombuffer . isEmpty ( ) ) {
log . error ( " Unable to retrieve RandomVideos " ) ;
}
randomrefill = null ;
}
2018-07-15 21:30:12 +02:00
}