package de.mrbesen.youtubecrawler; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.*; import java.util.stream.Collectors; import javax.xml.datatype.DatatypeConfigurationException; import javax.xml.datatype.DatatypeFactory; import javax.xml.datatype.Duration; import org.apache.log4j.Logger; import org.json.JSONArray; import org.json.JSONObject; import org.json.JSONTokener; public class YoutubeAPI { private String api_key = null; private static String BASEQUERY = "https://www.googleapis.com/youtube/v3/"; private static DateFormat dateformat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); private Logger log = Logger.getLogger(YoutubeAPI.class.getName()); private DatatypeFactory durationfactory = null; public YoutubeAPI(String apikey) { api_key = apikey; try { durationfactory = DatatypeFactory.newInstance(); } catch(DatatypeConfigurationException e) { e.printStackTrace(); System.exit(1); } } public Crawler.Video getInfo(String id) { return getInfos(id)[0].get(0); } public List[] getInfos(Collection ids) { if(ids.isEmpty()) return null; StringBuilder sb = new StringBuilder(); boolean isFirst = true; for(String id : ids) { if(id.matches("[a-zA-Z0-9_-]{11}")) { if(!isFirst) { sb.append(','); } sb.append(id); isFirst = false; } else { System.out.println("non matching id: \"" + id + "\""); } } return getInfos(sb.toString()); } public Map getCategories() { String query = BASEQUERY + "videoCategories?part=snippet®ionCode=us&key=" + api_key; JSONObject obj = parse(connect(query)); Map out = new TreeMap<>(); if(obj != null) { JSONArray items = obj.getJSONArray("items"); for(int i = 0; !items.isNull(i); ++i) { JSONObject item = items.getJSONObject(i); String id = item.getString("id"); String name = item.getJSONObject("snippet").getString("title"); try { int intid = Integer.parseInt(id); out.put(intid, name); // System.out.println(intid + ";" + name); } catch (NumberFormatException e) { e.printStackTrace(); } } } return out; } public List[] getInfos(String idlist) { ArrayList out = new ArrayList<>(idlist.length() / 12);//approximierte vorraussichtliche länge LinkedList livestr = new LinkedList<>(); String nextpage = ""; do { String query = BASEQUERY + "videos?part=snippet,contentDetails&id=" + idlist + nextpage + "&key=" + api_key; JSONObject json = parse(connect(query)); nextpage = ""; if(json != null) { if(json.has("items")) { //get video list json.getJSONArray("items").forEach(item -> out.add( getVid((JSONObject) item) )); if(json.has("nextPageToken")) { nextpage = "&pageToken=" + json.getString("nextPageToken"); } } } } while(!nextpage.isEmpty()); return new List[] {out, livestr}; } private Crawler.Video getVid(JSONObject json) { String vdid = json.getString("id"); JSONObject snippet = json.getJSONObject("snippet"); String title = snippet.optString("title", ""); //maxlen: 100 long published = getDate(snippet.optString("publishedAt", "")); String channel = snippet.optString("channelTitle", ""); String tags = ""; if(snippet.optJSONArray("tags") != null) tags = snippet.getJSONArray("tags").toList().stream().map(o -> (String) o).collect(Collectors.joining(",")); // max len: ~500 byte category = 0; try { category = Byte.parseByte(snippet.getString("categoryId")); } catch(NumberFormatException e) {} JSONObject contentDetails = json.getJSONObject("contentDetails"); int duration = (int) getDuration(contentDetails.optString("duration", "")); boolean live = !snippet.getString("liveBroadcastContent").equalsIgnoreCase("none"); String langCode = snippet.optString("defaultLanguage", snippet.optString("defaultAudioLanguage", "")); if(langCode.length() > 3) { langCode = langCode.substring(0, 3); } if(langCode.endsWith("-")) { langCode = langCode.substring(0, 2); } return new Crawler.Video(vdid, title, channel, tags, duration, langCode, category, published, live); } private long getDate(String format) { try { Date d = dateformat.parse(format.substring(0, 19).replace('T', ' ')); return d.getTime() / 1000; } catch (ParseException e) { e.printStackTrace(); System.err.println("Failed to parse date: " + format); } return 0; } private long getDuration(String iso8601) { Duration dur = durationfactory.newDuration(iso8601); return dur.getTimeInMillis(new Date(0)) / 1000; } public BufferedReader connect(String url) { if(url == null) return null; try { URL urll = new URL(url); //System.out.println("url: " + urll.toString()); HttpURLConnection con = (HttpURLConnection) urll.openConnection(); con.connect(); //System.out.println(con.getResponseCode()); return new BufferedReader(new InputStreamReader(con.getInputStream())); } catch(IOException e) { e.printStackTrace(); } return null; } public JSONObject parse(BufferedReader in) { if(in == null) return null; return new JSONObject(new JSONTokener(in)); } }