From f1a4442a538acefc9f5f515568aa6fa99b6af283 Mon Sep 17 00:00:00 2001 From: mrbesen Date: Mon, 16 Dec 2019 10:17:13 +0100 Subject: [PATCH] added postillon, added createchroot --- createchroot.sh | 12 ++++++++++++ postillon-new-ticker.sh | 43 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100755 createchroot.sh create mode 100755 postillon-new-ticker.sh diff --git a/createchroot.sh b/createchroot.sh new file mode 100755 index 0000000..e5962e2 --- /dev/null +++ b/createchroot.sh @@ -0,0 +1,12 @@ +#!/bin/bash +chroot="chroot2" +mkdir $chroot +cd $chroot +cp -r /bin/ . +cp -r /lib/ . +cp -r /lib64/ . + +mkdir usr +cp -r /usr/bin ./usr/ + +sudo chroot ./ /bin/bash diff --git a/postillon-new-ticker.sh b/postillon-new-ticker.sh new file mode 100755 index 0000000..122e279 --- /dev/null +++ b/postillon-new-ticker.sh @@ -0,0 +1,43 @@ + +pages="links.txt" +output="postillon.txt" + +#get links + +#nice way with crawling +link="https://www.der-postillon.com/search/label/Newsticker" +append="" +rm "$pages" +for i in {1..100}; do + echo "get: $link$append" + page=$(curl -s "$link$append") + echo "$page" | grep -Po "href='https.*\\.html" | cut -c 7- | uniq >> $pages + append=$(echo "$page" | grep -Po "\\?updated-max.*?by-date=false" | grep -v "&" | uniq -c | sort | head -n 1 | cut -d " " -f 8-) + #echo "possibles: " + #echo "$page" | grep -Po "\\?updated-max.*?by-date=false" | grep -v "&" | uniq -c | sort + sleep 1 # dont ban me pls +done + +# lame way with rss +#curl http://feeds.feedburner.com/blogspot/rkEL -s | sed 's/item>/item>\n/g' | grep "Newsticker" | grep -Po ".*?" | cut -c 7-86 > $pages + +#remove duplicates +cat "$pages" | sort | uniq > temp +mv temp "$pages" +echo "page count:" +wc -l "$pages" + +#download pages +wget -nc -nv -w 1 -i $pages + +for f in newsticker-*.html; do + echo "check file: $f" + cat $f | grep "+++" | grep -v "_Widget" | grep -v "meta" | grep -v "var" | grep -v "'\/>" | sed 's#
##g;s#+++##g' >> $output +done + +# make them uniq +cat $output | sort | uniq > temp +mv temp $output + +#remove temp +rm $pages newsticker-*.html