98 lines
3.0 KiB
C++
98 lines
3.0 KiB
C++
#include "de_mrbesen_youtubecrawler_CrawlerThread.h"
|
|
|
|
#include <iostream>
|
|
#include <string>
|
|
#include <sstream>
|
|
#include <regex>
|
|
#include <vector>
|
|
|
|
#include <curl/curl.h>
|
|
|
|
static const std::string YOUTUBEBASE = "https://youtube.com/watch?v=";
|
|
static const std::regex YOUTUBELINKPATTERN("watch\\?v=([-_a-zA-Z0-9]{11})");
|
|
|
|
std::vector<CURL*> curls;
|
|
|
|
static size_t WriteCallback(void* contents, size_t size, size_t nmemb, void* userp) {
|
|
*((std::ostringstream*) userp) << std::string((char*) contents, size * nmemb);
|
|
return size * nmemb;
|
|
}
|
|
|
|
JNIEXPORT void JNICALL Java_de_mrbesen_youtubecrawler_CrawlerThread_initLib(JNIEnv* env, jclass clazz, jint threadcount) {
|
|
curls.reserve(threadcount);
|
|
for(int i = 0; i < threadcount; ++i) {
|
|
CURL* curl = curl_easy_init();
|
|
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
|
|
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
|
|
curls.push_back(curl);
|
|
}
|
|
}
|
|
|
|
JNIEXPORT void JNICALL Java_de_mrbesen_youtubecrawler_CrawlerThread_deinitLib(JNIEnv* env, jclass) {
|
|
for(int i = 0; i < curls.size(); ++i) {
|
|
CURL* curl = curls.at(i);
|
|
curl_easy_cleanup(curl);
|
|
}
|
|
curls.clear();
|
|
}
|
|
|
|
std::string download(CURL* curl, const std::string& url) {
|
|
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
|
|
|
std::ostringstream out;
|
|
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &out);
|
|
|
|
CURLcode res = curl_easy_perform(curl);
|
|
long responsecode = 404;
|
|
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &responsecode);
|
|
if(responsecode != 200) {
|
|
std::cout << "Curl error: got " << responsecode << std::endl;
|
|
return "";
|
|
}
|
|
if(res != CURLE_OK) {
|
|
std::cout << "Curl error: " << res << std::endl;
|
|
return "";
|
|
}
|
|
|
|
return out.str();
|
|
}
|
|
|
|
JNIEXPORT void JNICALL Java_de_mrbesen_youtubecrawler_CrawlerThread_crawl(JNIEnv* env, jobject that, jstring videoid, jint threadid) {
|
|
// get videoid argument
|
|
jboolean myfalseval = false;
|
|
const char* cvideid = env->GetStringUTFChars(videoid, &myfalseval);
|
|
std::string svideoid(cvideid);
|
|
env->ReleaseStringUTFChars(videoid, cvideid);
|
|
|
|
// std::cout << "crawl: " << svideoid << std::endl;
|
|
|
|
// use curl to get the website
|
|
CURL* curl = curls.at(threadid);
|
|
std::string webcontent = download(curl, YOUTUBEBASE + svideoid);
|
|
if(webcontent.empty()) {
|
|
std::cout << "webcontent is empty" << std::endl;
|
|
}
|
|
|
|
jclass jclass_ct = env->FindClass("de/mrbesen/youtubecrawler/CrawlerThread");
|
|
jfieldID fid_ctfound = env->GetFieldID(jclass_ct, "found" , "Ljava/util/LinkedList;");
|
|
jclass jclass_ll = env->FindClass("java/util/LinkedList");
|
|
jmethodID mid_add = env->GetMethodID(jclass_ll, "add", "(Ljava/lang/Object;)Z");
|
|
|
|
// match regex
|
|
auto it = std::sregex_iterator(webcontent.begin(), webcontent.end(), YOUTUBELINKPATTERN);
|
|
auto itend = std::sregex_iterator();
|
|
|
|
for( ; it != itend; ++it) {
|
|
std::string ytid = (*it)[1].str();
|
|
|
|
// add to the found list
|
|
if(ytid != svideoid) {
|
|
//construct java string
|
|
jstring jytid = env->NewStringUTF(ytid.c_str());
|
|
jobject ll_found = env->GetObjectField(that, fid_ctfound);
|
|
jboolean b = env->CallBooleanMethod(ll_found, mid_add, jytid);
|
|
}
|
|
}
|
|
}
|
|
|