YoutubeCrawler/src/main/de/mrbesen/youtubecrawler/YoutubeAPI.java

178 lines
5.3 KiB
Java

package de.mrbesen.youtubecrawler;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.stream.Collectors;
import javax.xml.datatype.DatatypeConfigurationException;
import javax.xml.datatype.DatatypeFactory;
import javax.xml.datatype.Duration;
import org.apache.log4j.Logger;
import org.json.JSONArray;
import org.json.JSONObject;
import org.json.JSONTokener;
public class YoutubeAPI {
private String api_key = null;
private static String BASEQUERY = "https://www.googleapis.com/youtube/v3/";
private static DateFormat dateformat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
private Logger log = Logger.getLogger(YoutubeAPI.class.getName());
private DatatypeFactory durationfactory = null;
public YoutubeAPI(String apikey) {
api_key = apikey;
try {
durationfactory = DatatypeFactory.newInstance();
} catch(DatatypeConfigurationException e) {
e.printStackTrace();
System.exit(1);
}
}
public Crawler.Video getInfo(String id) {
return getInfos(id)[0].get(0);
}
public List<Crawler.Video>[] getInfos(Collection<String> ids) {
if(ids.isEmpty())
return null;
StringBuilder sb = new StringBuilder();
boolean isFirst = true;
for(String id : ids) {
if(id.matches("[a-zA-Z0-9_-]{11}")) {
if(!isFirst) {
sb.append(',');
}
sb.append(id);
isFirst = false;
} else {
System.out.println("non matching id: \"" + id + "\"");
}
}
return getInfos(sb.toString());
}
public Map<Integer, String> getCategories() {
String query = BASEQUERY + "videoCategories?part=snippet&regionCode=us&key=" + api_key;
JSONObject obj = parse(connect(query));
Map<Integer, String> out = new TreeMap<>();
if(obj != null) {
JSONArray items = obj.getJSONArray("items");
for(int i = 0; !items.isNull(i); ++i) {
JSONObject item = items.getJSONObject(i);
String id = item.getString("id");
String name = item.getJSONObject("snippet").getString("title");
try {
int intid = Integer.parseInt(id);
out.put(intid, name);
// System.out.println(intid + ";" + name);
} catch (NumberFormatException e) {
e.printStackTrace();
}
}
}
return out;
}
public List<Crawler.Video>[] getInfos(String idlist) {
ArrayList<Crawler.Video> out = new ArrayList<>(idlist.length() / 12);//approximierte vorraussichtliche länge
LinkedList<Crawler.Video> livestr = new LinkedList<>();
String nextpage = "";
do {
String query = BASEQUERY + "videos?part=snippet,contentDetails&id=" + idlist + nextpage + "&key=" + api_key;
JSONObject json = parse(connect(query));
nextpage = "";
if(json != null) {
if(json.has("items")) {
//get video list
json.getJSONArray("items").forEach(item -> out.add( getVid((JSONObject) item) ));
if(json.has("nextPageToken")) {
nextpage = "&pageToken=" + json.getString("nextPageToken");
}
}
}
} while(!nextpage.isEmpty());
return new List[] {out, livestr};
}
private Crawler.Video getVid(JSONObject json) {
String vdid = json.getString("id");
JSONObject snippet = json.getJSONObject("snippet");
String title = snippet.optString("title", ""); //maxlen: 100
long published = getDate(snippet.optString("publishedAt", ""));
String channel = snippet.optString("channelTitle", "");
String tags = "";
if(snippet.optJSONArray("tags") != null)
tags = snippet.getJSONArray("tags").toList().stream().map(o -> (String) o).collect(Collectors.joining(",")); // max len: ~500
byte category = 0;
try {
category = Byte.parseByte(snippet.getString("categoryId"));
} catch(NumberFormatException e) {}
JSONObject contentDetails = json.getJSONObject("contentDetails");
int duration = (int) getDuration(contentDetails.optString("duration", ""));
boolean live = !snippet.getString("liveBroadcastContent").equalsIgnoreCase("none");
String langCode = snippet.optString("defaultLanguage", snippet.optString("defaultAudioLanguage", ""));
if(langCode.length() > 3) {
langCode = langCode.substring(0, 3);
}
if(langCode.endsWith("-")) {
langCode = langCode.substring(0, 2);
}
return new Crawler.Video(vdid, title, channel, tags, duration, langCode, category, published, live);
}
private long getDate(String format) {
try {
Date d = dateformat.parse(format.substring(0, 19).replace('T', ' '));
return d.getTime() / 1000;
} catch (ParseException e) {
e.printStackTrace();
System.err.println("Failed to parse date: " + format);
}
return 0;
}
private long getDuration(String iso8601) {
Duration dur = durationfactory.newDuration(iso8601);
return dur.getTimeInMillis(new Date(0)) / 1000;
}
public BufferedReader connect(String url) {
if(url == null)
return null;
try {
URL urll = new URL(url);
//System.out.println("url: " + urll.toString());
HttpURLConnection con = (HttpURLConnection) urll.openConnection();
con.connect();
//System.out.println(con.getResponseCode());
return new BufferedReader(new InputStreamReader(con.getInputStream()));
} catch(IOException e) {
e.printStackTrace();
}
return null;
}
public JSONObject parse(BufferedReader in) {
if(in == null)
return null;
return new JSONObject(new JSONTokener(in));
}
}