YoutubeCrawler/src/main/de/mrbesen/youtubecrawler/YoutubeAPI.java

163 lines
4.9 KiB
Java

package de.mrbesen.youtubecrawler;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.stream.Collectors;
import javax.net.ssl.HttpsURLConnection;
import javax.xml.datatype.DatatypeConfigurationException;
import javax.xml.datatype.DatatypeFactory;
import javax.xml.datatype.Duration;
import org.apache.log4j.Logger;
import org.json.JSONArray;
import org.json.JSONObject;
import org.json.JSONTokener;
public class YoutubeAPI {
private String api_key = null;
private static String BASEQUERY = "https://www.googleapis.com/youtube/v3/";
private static DateFormat dateformat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
private Logger log = Logger.getLogger(YoutubeAPI.class.getName());
private DatatypeFactory durationfactory = null;
public YoutubeAPI(String apikey) {
api_key = apikey;
try {
durationfactory = DatatypeFactory.newInstance();
} catch(DatatypeConfigurationException e) {
e.printStackTrace();
System.exit(1);
}
}
public Crawler.Video getInfo(String id) {
return getInfos(id)[0].get(0);
}
public List<Crawler.Video>[] getInfos(List<String> ids) {
if(ids.isEmpty())
return null;
StringBuilder sb = new StringBuilder(ids.remove(0));
while(!ids.isEmpty()) {
sb.append(',').append(ids.remove(0));
}
return getInfos(sb.toString());
}
public Map<Integer, String> getCategories() {
String query = BASEQUERY + "videoCategories?part=snippet&regionCode=us&key=" + api_key;
JSONObject obj = parse(connect(query));
Map<Integer, String> out = new TreeMap<>();
if(obj != null) {
JSONArray items = obj.getJSONArray("items");
for(int i = 0; !items.isNull(i); ++i) {
JSONObject item = items.getJSONObject(i);
String id = item.getString("id");
String name = item.getJSONObject("snippet").getString("title");
try {
int intid = Integer.parseInt(id);
out.put(intid, name);
System.out.println(intid + ";" + name);
} catch (NumberFormatException e) {
e.printStackTrace();
}
}
}
return out;
}
public List<Crawler.Video>[] getInfos(String idlist) {
ArrayList<Crawler.Video> out = new ArrayList<>(idlist.length() / 12);//approximierte vorraussichtliche länge
LinkedList<Crawler.Video> livestr = new LinkedList<>();
String nextpage = "";
do {
String query = BASEQUERY + "videos?part=snippet,contentDetails&id=" + idlist + nextpage + "&key=" + api_key;
JSONObject json = parse(connect(query));
nextpage = "";
if(json != null) {
if(json.has("items")) {
//get video list
json.getJSONArray("items").forEach(item -> out.add( getVid((JSONObject) item) ));
if(json.has("nextPageToken")) {
nextpage = "&pageToken=" + json.getString("nextPageToken");
}
}
}
} while(!nextpage.isEmpty());
return new List[] {out, livestr};
}
private Crawler.Video getVid(JSONObject json) {
String vdid = json.getString("id");
JSONObject snippet = json.getJSONObject("snippet");
String title = snippet.getString("title"); //maxlen: 100
long published = getDate(snippet.getString("publishedAt"));
String channel = snippet.getString("channelTitle");
String tags = snippet.getJSONArray("tags").toList().stream().map(o -> (String) o).collect(Collectors.joining(", ")); // max len: ~500
byte category = 0;
try {
category = Byte.parseByte(snippet.getString("categoryId"));
} catch(NumberFormatException e) {}
JSONObject contentDetails = json.getJSONObject("contentDetails");
int duration = (int) getDuration(contentDetails.getString("duration"));
boolean live = !snippet.getString("liveBroadcastContent").equalsIgnoreCase("none");
String langCode = snippet.getString("defaultLanguage");
if(langCode.length() > 3) {
langCode = langCode.substring(0, 3);
}
if(langCode.endsWith("-")) {
langCode = langCode.substring(0, 2);
}
return new Crawler.Video(vdid, title, channel, tags, duration, langCode, category, published, live);
}
private long getDate(String format) {
try {
Date d = dateformat.parse(format.substring(0, 19).replace('T', ' '));
return d.getTime() / 1000;
} catch (ParseException e) {
e.printStackTrace();
System.err.println("Failed to parse date: " + format);
}
return 0;
}
private long getDuration(String iso8601) {
Duration dur = durationfactory.newDuration(iso8601);
return dur.getTimeInMillis(new Date(0)) / 1000;
}
public BufferedReader connect(String url) {
try {
URL urll = new URL(url);
HttpsURLConnection con = (HttpsURLConnection) urll.openConnection();
con.connect();
//System.out.println(con.getResponseCode());
return new BufferedReader(new InputStreamReader(con.getInputStream()));
} catch(IOException e) {
e.printStackTrace();
}
return null;
}
public JSONObject parse(BufferedReader in) {
if(in == null)
return null;
return new JSONObject(new JSONTokener(in));
}
}