added postillon, added createchroot
This commit is contained in:
parent
f775343ca1
commit
f1a4442a53
|
@ -0,0 +1,12 @@
|
|||
#!/bin/bash
|
||||
chroot="chroot2"
|
||||
mkdir $chroot
|
||||
cd $chroot
|
||||
cp -r /bin/ .
|
||||
cp -r /lib/ .
|
||||
cp -r /lib64/ .
|
||||
|
||||
mkdir usr
|
||||
cp -r /usr/bin ./usr/
|
||||
|
||||
sudo chroot ./ /bin/bash
|
|
@ -0,0 +1,43 @@
|
|||
|
||||
pages="links.txt"
|
||||
output="postillon.txt"
|
||||
|
||||
#get links
|
||||
|
||||
#nice way with crawling
|
||||
link="https://www.der-postillon.com/search/label/Newsticker"
|
||||
append=""
|
||||
rm "$pages"
|
||||
for i in {1..100}; do
|
||||
echo "get: $link$append"
|
||||
page=$(curl -s "$link$append")
|
||||
echo "$page" | grep -Po "href='https.*\\.html" | cut -c 7- | uniq >> $pages
|
||||
append=$(echo "$page" | grep -Po "\\?updated-max.*?by-date=false" | grep -v "&" | uniq -c | sort | head -n 1 | cut -d " " -f 8-)
|
||||
#echo "possibles: "
|
||||
#echo "$page" | grep -Po "\\?updated-max.*?by-date=false" | grep -v "&" | uniq -c | sort
|
||||
sleep 1 # dont ban me pls
|
||||
done
|
||||
|
||||
# lame way with rss
|
||||
#curl http://feeds.feedburner.com/blogspot/rkEL -s | sed 's/item>/item>\n/g' | grep "Newsticker" | grep -Po "<link>.*?</link>" | cut -c 7-86 > $pages
|
||||
|
||||
#remove duplicates
|
||||
cat "$pages" | sort | uniq > temp
|
||||
mv temp "$pages"
|
||||
echo "page count:"
|
||||
wc -l "$pages"
|
||||
|
||||
#download pages
|
||||
wget -nc -nv -w 1 -i $pages
|
||||
|
||||
for f in newsticker-*.html; do
|
||||
echo "check file: $f"
|
||||
cat $f | grep "+++" | grep -v "_Widget" | grep -v "meta" | grep -v "var" | grep -v "'\/>" | sed 's#<br />##g;s#+++##g' >> $output
|
||||
done
|
||||
|
||||
# make them uniq
|
||||
cat $output | sort | uniq > temp
|
||||
mv temp $output
|
||||
|
||||
#remove temp
|
||||
rm $pages newsticker-*.html
|
Loading…
Reference in New Issue