2018-07-15 21:30:12 +02:00
package de.mrbesen.youtubecrawler ;
import java.sql.Connection ;
import java.sql.DriverManager ;
import java.sql.ResultSet ;
import java.sql.SQLException ;
2018-07-20 19:57:43 +02:00
import java.util.LinkedList ;
2018-07-15 21:30:12 +02:00
import java.util.List ;
import org.apache.log4j.Logger ;
2018-07-20 19:57:43 +02:00
import com.mysql.cj.jdbc.exceptions.MysqlDataTruncation ;
2018-07-15 21:30:12 +02:00
import de.mrbesen.youtubecrawler.Crawler.Video ;
public class DB {
private Connection con ;
private String server = Config . prop . getProperty ( " db.host " , " localhost " ) , user = Config . prop . getProperty ( " db.user " , " ytcrawler " ) , pw = Config . prop . getProperty ( " db.pw " , " " ) , db = Config . prop . getProperty ( " db.dbname " , " ytcrawler " ) ;
private int port = Integer . parseInt ( Config . prop . getProperty ( " db.port " , " 3306 " ) ) ;
2018-07-15 22:09:37 +02:00
private Logger log = Logger . getLogger ( DB . class . getName ( ) ) ;
2018-07-15 21:30:12 +02:00
2018-07-16 12:43:56 +02:00
public DB ( ) {
2018-07-15 21:30:12 +02:00
try {
2018-07-16 12:43:56 +02:00
connect ( false ) ;
2018-07-16 23:22:32 +02:00
2018-07-15 21:30:12 +02:00
//set the database up!
boolean found = false ;
ResultSet set = con . getMetaData ( ) . getCatalogs ( ) ; //does the db exists?
while ( set . next ( ) ) {
if ( set . getString ( 1 ) . equalsIgnoreCase ( db ) ) {
found = true ;
con . setCatalog ( db ) ;
break ;
}
}
if ( ! found ) { //DataBase not found, try to create
log . warn ( " Database not found! tring to create! " ) ;
//create DB, table: konten / player / Transactions
update ( " CREATE DATABASE ` " + db + " ` /*!40100 DEFAULT CHARACTER SET latin1*/; " ) ;
con . setCatalog ( db ) ;
update ( " CREATE TABLE `videos` (`id` varchar(13) NOT NULL,`length` int(11) NOT NULL,`created` int(11) NOT NULL,`langcode` varchar(3) NOT NULL DEFAULT 'en',`category` int(11) DEFAULT NULL, PRIMARY KEY (`id`), UNIQUE KEY `id_UNIQUE` (`id`)) ENGINE=InnoDB DEFAULT CHARSET=latin1; " ) ;
2018-07-23 12:27:51 +02:00
update ( " CREATE TABLE `temp` ( `ytid` varchar(13) NOT NULL COMMENT 'a Table to store Video ids, when they are found to process them later', PRIMARY KEY (`ytid`), UNIQUE KEY `ytid_UNIQUE` (`ytid`)) ENGINE=InnoDB DEFAULT CHARSET=utf8; " ) ;
2018-09-11 00:05:10 +02:00
2018-07-16 23:22:32 +02:00
log . info ( " Database is set up! " ) ;
2018-07-23 12:27:51 +02:00
}
2018-07-16 12:43:56 +02:00
} catch ( SQLException e ) {
2018-07-15 21:30:12 +02:00
log . error ( " Error while connecting to the database! " , e ) ;
}
2018-07-16 12:43:56 +02:00
}
2018-07-15 21:30:12 +02:00
2018-07-16 12:43:56 +02:00
private void connect ( boolean selectdb ) {
try {
Class . forName ( " com.mysql.jdbc.Driver " ) ; //Treiber laden try this driver: com.mysql.cj.jdbc.Driver
//verbinden
con = DriverManager . getConnection ( " jdbc:mysql:// " + server + " : " + port + " / " + ( selectdb ? db : " " ) + " ?serverTimezone=UTC " , user , pw ) ;
} catch ( ClassNotFoundException | SQLException e ) {
log . error ( " Error while connecting to the database! " , e ) ;
}
2018-07-15 21:30:12 +02:00
}
/ * *
* removes all videos , that are known from the db
* @param input
* @return
* /
public List < String > checkvideos ( List < String > input ) {
2018-07-16 23:22:32 +02:00
if ( ! input . isEmpty ( ) ) {
StringBuilder ids = new StringBuilder ( ) ;
for ( int i = 0 ; i < input . size ( ) ; i + + ) {
ids . append ( ',' ) . append ( input . get ( i ) ) ;
}
String query = " SELECT `id` FROM `videos` WHERE concat('%',`id`,'%') LIKE ' " + ids . toString ( ) + " '; " ;
ResultSet res = query ( query ) ;
try {
while ( res . next ( ) ) {
input . remove ( res . getString ( 1 ) ) ;
}
} catch ( SQLException e ) {
e . printStackTrace ( ) ;
2018-07-15 21:30:12 +02:00
}
}
return input ;
}
/ * *
* save the list of videos to the DB
* @param input
* /
public void addVideos ( List < Video > input ) {
2018-07-17 13:42:06 +02:00
//log.info("add " + input.size() + " videos");
2018-07-15 21:30:12 +02:00
if ( input . size ( ) > 0 ) {
StringBuilder sb = new StringBuilder ( ) ;
for ( int i = 0 ; i < input . size ( ) ; i + + ) {
if ( i > 0 )
sb . append ( ',' ) ;
Video v = input . get ( i ) ;
2018-09-11 00:05:10 +02:00
if ( v ! = null )
sb . append ( " (' " ) . append ( v . id ) . append ( " ',' " ) . append ( v . length ) . append ( " ',' " ) . append ( v . created ) . append ( " ',' " ) . append ( v . languageCode ) . append ( " ',' " ) . append ( v . categorie ) . append ( " ',' " ) . append ( v . title ) . append ( " ',' " ) . append ( v . channel ) . append ( " ',' " ) . append ( v . tags ) . append ( " ') " ) ;
2018-07-20 19:57:43 +02:00
}
if ( sb . length ( ) > 2 ) {
String qu = " INSERT IGNORE INTO `ytcrawler`.`videos`(`id`, `length`, `created`, `langcode`, `category`, `videotitle`, `channel`, `tags`) VALUES " + sb . toString ( ) ;
update ( qu ) ;
2018-07-15 21:30:12 +02:00
}
2018-07-20 19:57:43 +02:00
}
}
2018-07-23 12:27:51 +02:00
2018-07-20 19:57:43 +02:00
public void updateVideos ( List < Video > input ) {
log . info ( " Updateing " + input . size ( ) + " videos. " ) ;
for ( Video v : input ) {
if ( v ! = null )
updateVideo ( v ) ;
}
}
2018-07-23 12:27:51 +02:00
2018-07-20 19:57:43 +02:00
private void updateVideo ( Video v ) {
try {
String qu = " UPDATE `ytcrawler`.`videos` SET `length` = ' " + v . length + " ', `created` = ' " + v . created + " ', `langcode` = SUBSTR(' " + v . languageCode + " ', 1, 3) ,`category` = ' " + v . categorie + " ',`videotitle` = SUBSTR(' " + v . title + " ',1,100),`channel` = SUBSTR(' " + v . channel + " ',1,20),`tags` = ' " + v . tags . substring ( 0 , v . tags . length ( ) > 40 ? 40 : v . tags . length ( ) ) + " ' WHERE `id` = ' " + v . id + " '; " ;
2018-07-15 21:30:12 +02:00
update ( qu ) ;
2018-07-20 19:57:43 +02:00
} catch ( NullPointerException e ) {
2018-07-23 12:27:51 +02:00
2018-07-20 19:57:43 +02:00
}
}
2018-07-23 12:27:51 +02:00
2018-07-20 19:57:43 +02:00
public LinkedList < String > getUncompleted ( int limit , int offset ) {
LinkedList < String > out = new LinkedList < > ( ) ;
String sql = " SELECT `id` FROM `videos` WHERE `channel` IS NULL LIMIT " + offset + " , " + limit + " ; " ;
ResultSet resu = query ( sql ) ;
try {
while ( resu . next ( ) ) {
out . add ( resu . getString ( 1 ) ) ;
}
} catch ( SQLException e ) {
log . info ( " error " , e ) ;
}
return out ;
}
2018-07-23 12:27:51 +02:00
2018-07-20 19:57:43 +02:00
public void removeVideos ( LinkedList < Video > vids ) {
log . info ( " Delete " + vids . size ( ) + " videos. " ) ;
for ( Video s : vids ) {
update ( " DELETE FROM `ytcrawler`.`videos` WHERE `id`=' " + s . id + " '; " ) ;
2018-07-15 21:30:12 +02:00
}
}
/ * *
* instant query
* @param q
* @return Das resultSet der Query
* /
2018-07-23 12:27:51 +02:00
public ResultSet query ( String q ) {
2018-07-15 21:30:12 +02:00
try {
2018-07-16 12:43:56 +02:00
if ( con . isClosed ( ) ) {
connect ( true ) ;
}
2018-07-15 21:30:12 +02:00
return con . prepareStatement ( q ) . executeQuery ( ) ;
} catch ( SQLException e ) {
log . error ( " Fehler bim ausführen der Query: " + q , e ) ;
}
return null ; //ERROR!
}
/ * *
* instant update
* @param q
* /
public void update ( String q ) {
try {
2018-07-16 12:43:56 +02:00
if ( con . isClosed ( ) ) {
connect ( true ) ;
}
2018-07-15 21:30:12 +02:00
con . prepareStatement ( q ) . executeUpdate ( ) ;
2018-07-20 19:57:43 +02:00
} catch ( MysqlDataTruncation ignore ) {
log . info ( " truncated. " , ignore ) ;
2018-07-15 21:30:12 +02:00
} catch ( SQLException e ) {
log . error ( " Fehler bim ausführen der Update-Query: " + q , e ) ;
}
}
2018-07-19 17:59:26 +02:00
public String getRandom ( ) {
ResultSet set = query ( " SELECT `id`, rand() as 'r' FROM `videos` ORDER BY r LIMIT 1; " ) ;
if ( set = = null )
return null ;
try {
if ( set . next ( ) ) {
return set . getString ( 1 ) ;
}
} catch ( SQLException e ) {
log . warn ( " error getting a random video " , e ) ;
}
return null ;
}
2018-07-23 12:27:51 +02:00
public LinkedList < String > restoreTemp ( ) {
2018-09-11 00:05:10 +02:00
ResultSet res = query ( " SELECT * FROM `ytcrawler`.`temp` LIMIT 500; " ) ;
2018-07-23 12:27:51 +02:00
LinkedList < String > out = new LinkedList < > ( ) ;
log . info ( " RestoreTemp " ) ;
try {
while ( res . next ( ) ) {
out . add ( res . getString ( 1 ) ) ;
}
2018-09-11 00:05:10 +02:00
update ( " DELETE FROM `ytcrawler`.`temp` LIMIT 500; " ) ;
2018-07-23 12:27:51 +02:00
} catch ( Exception e ) { }
return out ;
}
2018-09-11 00:05:10 +02:00
public void deleteDouble ( ) {
update ( " call ytcrawler.deletedouble(); " ) ;
}
2018-07-23 12:27:51 +02:00
public void storeTemp ( LinkedList < String > strings ) {
if ( ! strings . isEmpty ( ) ) {
StringBuilder sb = new StringBuilder ( ) ;
for ( String s : strings ) {
sb . append ( " '), (' " ) . append ( s ) ;
}
update ( " INSERT IGNORE INTO `ytcrawler`.`temp` (`ytid`) VALUES (' " + sb . substring ( 6 ) . toString ( ) + " '); " ) ;
}
}
2018-07-15 21:30:12 +02:00
}