diff --git a/src/de/mrbesen/youtubecrawler/CrawlerThread.java b/src/de/mrbesen/youtubecrawler/CrawlerThread.java index 50f4233..b1ec906 100644 --- a/src/de/mrbesen/youtubecrawler/CrawlerThread.java +++ b/src/de/mrbesen/youtubecrawler/CrawlerThread.java @@ -9,7 +9,7 @@ import org.apache.log4j.Logger; public class CrawlerThread implements Runnable { - private static Pattern linkpattern = Pattern.compile("href=\"\\/watch\\?v="); + private static Pattern linkpattern = Pattern.compile("watch\\?v=([-_a-zA-Z0-9]{11})"); private Logger log = Logger.getLogger(this.getClass().getName()); private Crawler parent; @@ -90,21 +90,13 @@ public class CrawlerThread implements Runnable { crawled.add(videoid); // log.info("crawling: " + videoid); - HTTPS con = new HTTPS("https://youtube.com/watch?v=" + videoid); + HTTPS con = new HTTPS("https://youtube.com/watch?v=" + videoid); String s = con.getContent(); Matcher matcher = linkpattern.matcher(s); while(matcher.find()) { - int beginytid = matcher.end(); - int endxtid = s.indexOf('"', beginytid); - int endid = s.indexOf('&', beginytid); - if(endid < endxtid) { - endxtid = endid; - } - String ytid = s.substring(beginytid, endxtid); - if(ytid.length() > 9 && ytid.length() <= 12) { + String ytid = matcher.group(1); + if(!ytid.equals(videoid)) { found.add(ytid); - } else { -// log.warn("youtube id has wrong length: \"" + ytid + "\""); } } } catch(IOException e) {