178 lines
5.3 KiB
Java
178 lines
5.3 KiB
Java
package de.mrbesen.youtubecrawler;
|
|
|
|
import java.io.BufferedReader;
|
|
import java.io.IOException;
|
|
import java.io.InputStreamReader;
|
|
import java.net.HttpURLConnection;
|
|
import java.net.URL;
|
|
import java.text.DateFormat;
|
|
import java.text.ParseException;
|
|
import java.text.SimpleDateFormat;
|
|
import java.util.*;
|
|
import java.util.stream.Collectors;
|
|
|
|
import javax.xml.datatype.DatatypeConfigurationException;
|
|
import javax.xml.datatype.DatatypeFactory;
|
|
import javax.xml.datatype.Duration;
|
|
|
|
import org.apache.log4j.Logger;
|
|
import org.json.JSONArray;
|
|
import org.json.JSONObject;
|
|
import org.json.JSONTokener;
|
|
|
|
public class YoutubeAPI {
|
|
|
|
private String api_key = null;
|
|
private static String BASEQUERY = "https://www.googleapis.com/youtube/v3/";
|
|
private static DateFormat dateformat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
|
|
private Logger log = Logger.getLogger(YoutubeAPI.class.getName());
|
|
private DatatypeFactory durationfactory = null;
|
|
|
|
public YoutubeAPI(String apikey) {
|
|
api_key = apikey;
|
|
try {
|
|
durationfactory = DatatypeFactory.newInstance();
|
|
} catch(DatatypeConfigurationException e) {
|
|
e.printStackTrace();
|
|
System.exit(1);
|
|
}
|
|
}
|
|
|
|
public Crawler.Video getInfo(String id) {
|
|
return getInfos(id)[0].get(0);
|
|
}
|
|
|
|
public List<Crawler.Video>[] getInfos(Collection<String> ids) {
|
|
if(ids.isEmpty())
|
|
return null;
|
|
|
|
StringBuilder sb = new StringBuilder();
|
|
boolean isFirst = true;
|
|
for(String id : ids) {
|
|
if(id.matches("[a-zA-Z0-9_-]{11}")) {
|
|
if(!isFirst) {
|
|
sb.append(',');
|
|
}
|
|
sb.append(id);
|
|
isFirst = false;
|
|
} else {
|
|
System.out.println("non matching id: \"" + id + "\"");
|
|
}
|
|
}
|
|
return getInfos(sb.toString());
|
|
}
|
|
|
|
public Map<Integer, String> getCategories() {
|
|
String query = BASEQUERY + "videoCategories?part=snippet®ionCode=us&key=" + api_key;
|
|
JSONObject obj = parse(connect(query));
|
|
Map<Integer, String> out = new TreeMap<>();
|
|
if(obj != null) {
|
|
JSONArray items = obj.getJSONArray("items");
|
|
for(int i = 0; !items.isNull(i); ++i) {
|
|
JSONObject item = items.getJSONObject(i);
|
|
String id = item.getString("id");
|
|
String name = item.getJSONObject("snippet").getString("title");
|
|
try {
|
|
int intid = Integer.parseInt(id);
|
|
out.put(intid, name);
|
|
// System.out.println(intid + ";" + name);
|
|
} catch (NumberFormatException e) {
|
|
e.printStackTrace();
|
|
}
|
|
}
|
|
}
|
|
return out;
|
|
}
|
|
|
|
public List<Crawler.Video>[] getInfos(String idlist) {
|
|
ArrayList<Crawler.Video> out = new ArrayList<>(idlist.length() / 12);//approximierte vorraussichtliche länge
|
|
LinkedList<Crawler.Video> livestr = new LinkedList<>();
|
|
String nextpage = "";
|
|
do {
|
|
String query = BASEQUERY + "videos?part=snippet,contentDetails&id=" + idlist + nextpage + "&key=" + api_key;
|
|
JSONObject json = parse(connect(query));
|
|
nextpage = "";
|
|
if(json != null) {
|
|
if(json.has("items")) {
|
|
//get video list
|
|
json.getJSONArray("items").forEach(item -> out.add( getVid((JSONObject) item) ));
|
|
|
|
if(json.has("nextPageToken")) {
|
|
nextpage = "&pageToken=" + json.getString("nextPageToken");
|
|
}
|
|
}
|
|
}
|
|
} while(!nextpage.isEmpty());
|
|
|
|
return new List[] {out, livestr};
|
|
}
|
|
|
|
private Crawler.Video getVid(JSONObject json) {
|
|
String vdid = json.getString("id");
|
|
JSONObject snippet = json.getJSONObject("snippet");
|
|
String title = snippet.optString("title", ""); //maxlen: 100
|
|
long published = getDate(snippet.optString("publishedAt", ""));
|
|
String channel = snippet.optString("channelTitle", "");
|
|
|
|
String tags = "";
|
|
if(snippet.optJSONArray("tags") != null)
|
|
tags = snippet.getJSONArray("tags").toList().stream().map(o -> (String) o).collect(Collectors.joining(",")); // max len: ~500
|
|
|
|
byte category = 0;
|
|
try {
|
|
category = Byte.parseByte(snippet.getString("categoryId"));
|
|
} catch(NumberFormatException e) {}
|
|
|
|
JSONObject contentDetails = json.getJSONObject("contentDetails");
|
|
int duration = (int) getDuration(contentDetails.optString("duration", ""));
|
|
boolean live = !snippet.getString("liveBroadcastContent").equalsIgnoreCase("none");
|
|
|
|
String langCode = snippet.optString("defaultLanguage", snippet.optString("defaultAudioLanguage", ""));
|
|
if(langCode.length() > 3) {
|
|
langCode = langCode.substring(0, 3);
|
|
}
|
|
if(langCode.endsWith("-")) {
|
|
langCode = langCode.substring(0, 2);
|
|
}
|
|
|
|
return new Crawler.Video(vdid, title, channel, tags, duration, langCode, category, published, live);
|
|
}
|
|
|
|
private long getDate(String format) {
|
|
try {
|
|
Date d = dateformat.parse(format.substring(0, 19).replace('T', ' '));
|
|
return d.getTime() / 1000;
|
|
} catch (ParseException e) {
|
|
e.printStackTrace();
|
|
System.err.println("Failed to parse date: " + format);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
private long getDuration(String iso8601) {
|
|
Duration dur = durationfactory.newDuration(iso8601);
|
|
return dur.getTimeInMillis(new Date(0)) / 1000;
|
|
}
|
|
|
|
public BufferedReader connect(String url) {
|
|
if(url == null)
|
|
return null;
|
|
try {
|
|
URL urll = new URL(url);
|
|
//System.out.println("url: " + urll.toString());
|
|
HttpURLConnection con = (HttpURLConnection) urll.openConnection();
|
|
con.connect();
|
|
//System.out.println(con.getResponseCode());
|
|
return new BufferedReader(new InputStreamReader(con.getInputStream()));
|
|
} catch(IOException e) {
|
|
e.printStackTrace();
|
|
}
|
|
return null;
|
|
}
|
|
|
|
public JSONObject parse(BufferedReader in) {
|
|
if(in == null)
|
|
return null;
|
|
return new JSONObject(new JSONTokener(in));
|
|
}
|
|
} |