Commit ed729bd1 authored by remy's avatar remy
Browse files

adding some basic options to the checking script

parent dabcac1b
#!/bin/bash
########################################################################
###### Ce script verifie l'etat des sites web geres par MBB
###### internes ou pas
###### en recuperant la page d'index et le status par curl
###### Ce script verifie l'etat des sites web
###### en recuperant la page d'index et leur statut par curl
###### les pages d'index permettent de produire des checksums
###### qui sont comparees a une version historique sha256sum.txt.orig
######
###### En cas de rajout de nouveau site dans le fichier lists.txt
###### il faut donc lancer le checks.sh a la main une 1ere fois
###### et copier sha256sum.txt en sha256sum.txt.orig
###### puis le relancer dans la foulee
###### il faut donc de nouveau lancer le checks.sh avec l'option
###### firstrun
######
###### En cas de site dynamique, la comparaison des checksums par
###### rapport a la version originelle produira une erreur.
......@@ -49,79 +47,114 @@ AWK=`command -v awk`
DIFF=`command -v diff`
usage="$0 [verbose|firstrun|--help]\n\n
usage="$0 [verbose|firstrun||check|clean|keepscan|--help]\n\n
\tverbose: display more informations while running\n
\tfirstrun: build the sha256sum.txt.orig file and a status.log file (see bellow)\n
\tcheck: run a basic check - get http answer and compare index checksums to the first run\n
\tclean: remove the workdir\n
\tkeepscan: keep the previous scan by renaming into html files with '.orig' extension and launch the check\n
\t--help: print this help and exit.\n
\n\n
If you run this code with no option, it will compare the checksums to the previous one\n
If you run this code with the check option, it will compare the checksums to the previous one\n
and build the status.log file built from the server HTTP answers.\n
Finally, it will export those results to a checksum.json file and a status.json file.\n
\n\n
If you add a new website, use the firstrun option
\n\n
If you want to compare what have changed, use keepscan option and then, run it again with verbose option.
"
function remove_dyn_elems {
# need to update this function to be more agnostic on websites
#wp_regexp="search-form|cropped|pusher.+ver=|ajax.php"
#wp_plugin_calendar_regexp=' id="today"'
#wp_plugin_rsswidget_regexp="rsswidget"
#gitlab_regexp="authenticity_token|csrf-token"
#typesetter_regexp="wgBackendResponseTime|wgRequestId|time|[0-9]{1,3}\.[0-9]{1,2}+%\s+[0-9]{1}\.[0-9]{3}"
#nextcloud_regexp="token|nonce"
#gitbook_regexp="data-revision"
# removing token or unique temporary ID
## gitlab
sed -i "/authenticity_token/d" gitlab*.index.html
sed -i "/csrf-token/d" gitlab*.index.html
## bpp typesetter cms
sed -i "/wgBackendResponseTime/d" bpp.index.html
sed -i "/wgRequestId/d" bpp.index.html
sed -i "/time/d" bpp.index.html
sed -ri "/[0-9]{1,3}\.[0-9]{1,2}+%\s+[0-9]{1}\.[0-9]{3}/d" bpp.index.html
## nextcloud mbb ssvc
sed -i "/token/d" nextcloud-mbb.index.html
sed -i "/nonce/d" nextcloud-mbb.index.html
## intranet wordpress
sed -i "/search-form/d" intranet.index.html
## evolea, trans-sys-repro wordpress
sed -i "/cropped/d" {evolea,trans-sys-repro}.index.html
## outgen wordpress
sed -i "/seconds/d" outgen.index.html
## recomb2018 wordpress
sed -ri "/pusher.+ver=/d" rcb18.index.html
sed -i "/ajax.php/d" rcb18.index.html
## wordpress calendar plugin {faba-shape,regulbass} website
## nevertheless, from one month to another, calendar will change
sed -i 's/ id="today"//' faba-shape.index.html
sed -i 's/ id="today"//' regulbass.index.html
## wordpress rsswidget
sed -i "/rsswidget/d" isem-ngsworkgroup.index.html
## gitbook generation info
sed -i "/data-revision/d" tp-rocks.index.html
sed -i "/data-revision/d" doc-user-mbb.index.html
sed -i "/data-revision/d" formations-mbb.index.html
sed -i "/data-revision/d" doc-sysadmin-mbb.index.html
}
if [ ! -d ${WORKDIR} ]; then
mkdir ${WORKDIR}
fi
# listing of dynamic elements, depending on product solution
wp_regexp='[search-form|cropped]'
wp_regexp_plugin_wedevsprojectmanager='pusher.+ver='
wp_regexp_plugin_gravityform='ajax.php'
wp_regexp_plugin_calendar=' id=\"today\"'
wp_regexp_plugin_rsswidget='rsswidget'
gitlab_regexp='[authenticity_token|csrf-token]'
typesetter_regexp='[wgBackendResponseTime|wgRequestId|time|[0-9]{1,3}\.[0-9]{1,2}+%\s+[0-9]{1}\.[0-9]{3}]'
nextcloud_regexp='[token|nonce]'
gitbook_regexp='data-revision'
while read name website type product plugin activity owner
do
if [[ "$name" =~ [[:space:]]*# ]] || [[ "$name" == "" ]] || [[ "$activity" == "inactive" ]]; then
continue
else
if [[ "$type" == "dyn" ]]; then
case $product in
"w(ord)?p(ress)?" )
sed -ri -e /$wp_regexp/d $name.index.html
case $plugin in
"wedevs-project-manager" )
sed -ri -e /$wp_regexp_plugin_wedevsprojectmanager/d $name.index.html
;;
"rsswidget" )
sed -ri -e /$wp_regexp_plugin_rsswidget/d $name.index.html
;;
"gravityform" )
sed -ri -e /$wp_regexp_plugin_gravityform/d $name.index.html
;;
"calendar" )
sed -ri -e /$wp_regexp_plugin_calendar/d $name.index.html
;;
* )
;;
esac
;;
"typesetter" )
sed -ri -e /$typesetter_regexp/d $name.index.html
;;
"gitlab" )
sed -ri -e /$gitlab_regexp/d $name.index.html
;;
"gitbook" )
sed -ri -e /$gitbook_regexp/d $name.index.html
;;
"nextcloud" )
sed -ri -e /$nextcloud_regexp/d $name.index.html
;;
* )
;;
esac
fi
fi
done < $1
}
if [[ "$1" == "verbose" ]]; then
VERBOSE=true
elif [[ "$1" == "firstrun" ]]; then
rm -rf ${WORKDIR}
FIRST=true
elif [[ "$1" == "-h" ]] || [[ "$1" == "--help" ]]; then
echo -e $usage
elif [[ "$1" == "clean" ]];then
rm -rf ${WORKDIR}
exit 0
elif [[ "$1" == "keepscan" ]];then
cd ${WORKDIR}
for file in `ls *.html`; do
mv $file $file.orig
done
else
if [[ "$1" != "check" ]]; then
echo -e $usage
exit 0
fi
fi
if [ ! -d ${WORKDIR} ]; then
mkdir ${WORKDIR}
fi
cd ${WORKDIR}
rm status.log *.html sha256sum.txt 2>/dev/null
while read name website type product activity owner
while read name website type product plugin activity owner
do
if [[ "$name" =~ "^#*" ]] || [[ "$name" == "" ]] || [[ "$activity" == "inactive" ]]; then
if [[ "$name" =~ [[:space:]]*# ]] || [[ "$name" == "" ]] || [[ "$activity" == "inactive" ]]; then
continue
else
echo -n $website" " >> status.log && $CURL $CURL_OPTS $website|head -1 >> status.log
......@@ -142,7 +175,21 @@ if [ -z ${FIRST} ]; then
# sha256sum -c sha256sum.txt.orig
if [ ${VERBOSE} ]; then
$DIFF sha256sum.txt sha256sum.txt.orig
${DIFF} sha256sum.txt sha256sum.txt.orig |tee modified_websites.txt
websites_modified=`awk '/index.html/ {print $3}' modified_websites.txt|uniq`
for website_modified in $websites_modified
do
orig_file=`echo "${website_modified}.orig"`
echo ""
echo "#################################################################"
echo "Checking if there is any .orig file to compare with new result..."
echo "#################################################################"
echo ""
if [ -f $website_modified ] && [ -f $orig_file ]; then
$DIFF $orig_file $website_modified
fi
done
#rm modified_websites.txt
else
$DIFF -q sha256sum.txt sha256sum.txt.orig 1>/dev/null 2>&1
if [ $? -eq 1 ]; then
......@@ -153,9 +200,13 @@ if [ -z ${FIRST} ]; then
total=`cat sha256sum.txt*|wc -l`
rm checksums.json 2>/dev/null
echo "{" > checksums.json
while read name url type product activity owner
while read name url type product plugin activity owner
do
$AWK -f ../check_sha256.awk -v url=$url -v sitename=${name} -v total=$total sha256sum.txt*
if [[ "$name" =~ [[:space:]]*# ]] || [[ "$name" == "" ]] || [[ "$activity" == "inactive" ]]; then
continue
else
$AWK -f ../check_sha256.awk -v url=$url -v sitename=${name} -v total=$total sha256sum.txt*
fi
done < ../lists.txt|sort >> checksums.json
echo "}" >> checksums.json
else
......
#name url type product activity owner
#name url type product plugin activity owner
## name == website's name (required)
## url == url to the index page (required)
## type == [static] or [dyn]amic content (required)
......@@ -6,10 +6,10 @@
## activity == [active|inactive] (required)
## owner == who/which company own this website (optionnal)
hesk https://kimura.univ-montp2.fr/aide/ static hesk active mbb
rstudio https://rstudio.mbb.univ-montp2.fr/auth-sign-in static rstudio active mbb
wicopa https://web.mbb.univ-montp2.fr/wicopa/ static php active mbb
rpacib https://shiny.mbb.univ-montp2.fr/RPACIB/ static shiny active mbb
bpp http://biopp.univ-montp2.fr/wiki/index.php/Main_Page dyn typesetter active julien
gitlab https://gitlab.mbb.univ-montp2.fr/users/sign_in dyn gitlab active mbb
doc-user-mbb https://gitlab.mbb.univ-montp2.fr/docs/doc_user/docs/_book/index.html static gitbook active mbb
hesk https://kimura.univ-montp2.fr/aide/ static hesk - active mbb
rstudio https://rstudio.mbb.univ-montp2.fr/auth-sign-in static rstudio - active mbb
wicopa https://web.mbb.univ-montp2.fr/wicopa/ static php - active mbb
rpacib https://shiny.mbb.univ-montp2.fr/RPACIB/ static shiny - active mbb
bpp http://biopp.univ-montp2.fr/wiki/index.php/Main_Page dyn typesetter - active julien
gitlab https://gitlab.mbb.univ-montp2.fr/users/sign_in dyn gitlab - active mbb
doc-user-mbb https://gitlab.mbb.univ-montp2.fr/docs/doc_user/docs/_book/index.html static gitbook - active mbb
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment