#include "de_mrbesen_youtubecrawler_CrawlerThread.h" #include #include #include #include #include #include static const std::string YOUTUBEBASE = "https://youtube.com/watch?v="; static const std::regex YOUTUBELINKPATTERN("watch\\?v=([-_a-zA-Z0-9]{11})"); std::vector curls; static size_t WriteCallback(void* contents, size_t size, size_t nmemb, void* userp) { *((std::ostringstream*) userp) << std::string((char*) contents, size * nmemb); return size * nmemb; } JNIEXPORT void JNICALL Java_de_mrbesen_youtubecrawler_CrawlerThread_initLib(JNIEnv* env, jclass clazz, jint threadcount) { curls.reserve(threadcount); for(int i = 0; i < threadcount; ++i) { CURL* curl = curl_easy_init(); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback); curls.push_back(curl); } } JNIEXPORT void JNICALL Java_de_mrbesen_youtubecrawler_CrawlerThread_deinitLib(JNIEnv* env, jclass) { for(int i = 0; i < curls.size(); ++i) { CURL* curl = curls.at(i); curl_easy_cleanup(curl); } curls.clear(); } std::string download(CURL* curl, const std::string& url) { curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); std::ostringstream out; curl_easy_setopt(curl, CURLOPT_WRITEDATA, &out); CURLcode res = curl_easy_perform(curl); long responsecode = 404; curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &responsecode); if(responsecode != 200) { std::cout << "Curl error: got " << responsecode << std::endl; return ""; } if(res != CURLE_OK) { std::cout << "Curl error: " << res << std::endl; return ""; } return out.str(); } JNIEXPORT void JNICALL Java_de_mrbesen_youtubecrawler_CrawlerThread_crawl(JNIEnv* env, jobject that, jstring videoid, jint threadid) { // get videoid argument jboolean myfalseval = false; const char* cvideid = env->GetStringUTFChars(videoid, &myfalseval); std::string svideoid(cvideid); env->ReleaseStringUTFChars(videoid, cvideid); // std::cout << "crawl: " << svideoid << std::endl; // use curl to get the website CURL* curl = curls.at(threadid); std::string webcontent = download(curl, YOUTUBEBASE + svideoid); if(webcontent.empty()) { std::cout << "webcontent is empty" << std::endl; } jclass jclass_ct = env->FindClass("de/mrbesen/youtubecrawler/CrawlerThread"); jfieldID fid_ctfound = env->GetFieldID(jclass_ct, "found" , "Ljava/util/LinkedList;"); jclass jclass_ll = env->FindClass("java/util/LinkedList"); jmethodID mid_add = env->GetMethodID(jclass_ll, "add", "(Ljava/lang/Object;)Z"); // match regex auto it = std::sregex_iterator(webcontent.begin(), webcontent.end(), YOUTUBELINKPATTERN); auto itend = std::sregex_iterator(); for( ; it != itend; ++it) { std::string ytid = (*it)[1].str(); // add to the found list if(ytid != svideoid) { //construct java string jstring jytid = env->NewStringUTF(ytid.c_str()); jobject ll_found = env->GetObjectField(that, fid_ctfound); jboolean b = env->CallBooleanMethod(ll_found, mid_add, jytid); } } }