Commit ad8048bd authored by remy's avatar remy
Browse files

Seems to work with arrays for sed substitution

parent ed729bd1
#!/bin/bash
########################################################################
###### Ce script verifie l'etat des sites web
###### Ce script verifie l'etat des sites web listés dans lists.txt
###### en recuperant la page d'index et leur statut par curl
###### les pages d'index permettent de produire des checksums
###### qui sont comparees a une version historique sha256sum.txt.orig
######
######
######
###### En cas de rajout de nouveau site dans le fichier lists.txt
###### il faut donc de nouveau lancer le checks.sh avec l'option
###### firstrun
###### "init"
######
###### Un lancement normal se fait avec l'option "check" et produit
###### des fichiers log et json dans un sous-dossier workdir où
###### se fait tout le travail.
######
###### En cas de site dynamique, la comparaison des checksums par
###### rapport a la version originelle produira une erreur.
###### Il faudra donc copier le fichier d'index html
###### ex: cd workdir && rename.ul .html .html.orig *.html
###### Relancer de nouveau ce script
###### puis faire des diff entre les 2 fichiers html
###### pour identifiant les elements changeant dans la page.
###### Il faut ensuite supprimer de la page tous les
###### champs ou morceaux du DOM susceptibles de changer
###### (voir la liste de sed plus bas (souvent des token...))
###### il faut ensuite regenerer les checksums et en faire une copie
###### dans sha256sum.txt.orig
###### Il faudra alors lancer le script avec l'option "compare"
###### qui va faire des diff entre les fichiers html
###### pour permettre d'identifier les elements changeant dans la page.
######
###### Si la solution utilisée produit une page dynamique et qu'elle
###### n'est pas connue du script il faudra la rajouter. Pour ça, voir
###### la fonction remove_dyn_elems
########################################################################
# En cas d'update des fichiers wordpress avec InfiniteWP, il faudra
......@@ -47,46 +50,47 @@ AWK=`command -v awk`
DIFF=`command -v diff`
usage="$0 [verbose|firstrun||check|clean|keepscan|--help]\n\n
\tverbose: display more informations while running\n
\tfirstrun: build the sha256sum.txt.orig file and a status.log file (see bellow)\n
usage="$0 [compare|init|check|clean|--help]\n\n
\tcompare: display more informations on differences\n
\tinit: build the sha256sum.txt.orig file and a status.log file (see bellow)\n
\tcheck: run a basic check - get http answer and compare index checksums to the first run\n
\tclean: remove the workdir\n
\tkeepscan: keep the previous scan by renaming into html files with '.orig' extension and launch the check\n
\t--help: print this help and exit.\n
\n\n
If you run this code with the check option, it will compare the checksums to the previous one\n
and build the status.log file built from the server HTTP answers.\n
If you run this code with the check option, it will compare the checksums to the previous one and build the status.log file built from the server HTTP answers.\n
Finally, it will export those results to a checksum.json file and a status.json file.\n
\n\n
If you add a new website, use the firstrun option
If you add a new website to lists.txt, use the init option
\n\n
If you want to compare what have changed, use keepscan option and then, run it again with verbose option.
If you want to show differences about what have changed, use the compare option.
"
function remove_dyn_elems {
# listing of dynamic elements, depending on product solution
wp_regexp='[search-form|cropped]'
wp_regexp_arr=("search-form" "cropped")
wp_regexp_plugin_wedevsprojectmanager='pusher.+ver='
wp_regexp_plugin_gravityform='ajax.php'
wp_regexp_plugin_calendar=' id=\"today\"'
wp_regexp_plugin_rsswidget='rsswidget'
gitlab_regexp='[authenticity_token|csrf-token]'
typesetter_regexp='[wgBackendResponseTime|wgRequestId|time|[0-9]{1,3}\.[0-9]{1,2}+%\s+[0-9]{1}\.[0-9]{3}]'
nextcloud_regexp='[token|nonce]'
gitlab_regexp_arr=("authenticity_token" "csrf-token")
typesetter_regexp_arr=("wgBackendResponseTime" "wgRequestId" "time" "[0-9]{1,3}\.[0-9]{1,2}+%\s+[0-9]{1}\.[0-9]{3}")
nextcloud_regexp_arr=("token" "nonce")
gitbook_regexp='data-revision'
while read name website type product plugin activity owner
do
if [[ "$name" =~ [[:space:]]*# ]] || [[ "$name" == "" ]] || [[ "$activity" == "inactive" ]]; then
if [[ "${name}" =~ [[:space:]]*# ]] || [[ "${name}" == "" ]] || [[ "${activity}" == "inactive" ]]; then
continue
else
if [[ "$type" == "dyn" ]]; then
case $product in
"w(ord)?p(ress)?" )
sed -ri -e /$wp_regexp/d $name.index.html
if [[ "${type}" == "dyn" ]]; then
case ${product} in
wp|wordpress )
for wp_regexp in ${wp_regexp_arr[@]}
do
sed -ri -e /$wp_regexp/d $name.index.html
done
case $plugin in
"wedevs-project-manager" )
sed -ri -e /$wp_regexp_plugin_wedevsprojectmanager/d $name.index.html
......@@ -105,16 +109,25 @@ function remove_dyn_elems {
esac
;;
"typesetter" )
sed -ri -e /$typesetter_regexp/d $name.index.html
for typesetter_regexp in ${typesetter_regexp_arr[@]}
do
sed -ri -e /$typesetter_regexp/d $name.index.html
done
;;
"gitlab" )
sed -ri -e /$gitlab_regexp/d $name.index.html
for gitlab_regexp in ${gitlab_regexp_arr[@]}
do
sed -ri -e /$gitlab_regexp/d $name.index.html
done
;;
"gitbook" )
sed -ri -e /$gitbook_regexp/d $name.index.html
;;
"nextcloud" )
sed -ri -e /$nextcloud_regexp/d $name.index.html
for nextcloud_regexp in ${nextcloud_regexp_arr[@]}
do
sed -ri -e /$nextcloud_regexp/d $name.index.html
done
;;
* )
;;
......@@ -124,19 +137,18 @@ function remove_dyn_elems {
done < $1
}
if [[ "$1" == "verbose" ]]; then
VERBOSE=true
elif [[ "$1" == "firstrun" ]]; then
if [[ "$1" == "compare" ]]; then
cd ${WORKDIR}
for file in `ls *.html`; do
mv $file $file.orig
done
COMPARE=true
elif [[ "$1" == "init" ]]; then
rm -rf ${WORKDIR}
FIRST=true
elif [[ "$1" == "clean" ]];then
rm -rf ${WORKDIR}
exit 0
elif [[ "$1" == "keepscan" ]];then
cd ${WORKDIR}
for file in `ls *.html`; do
mv $file $file.orig
done
else
if [[ "$1" != "check" ]]; then
echo -e $usage
......@@ -174,7 +186,7 @@ if [ -z ${FIRST} ]; then
# or the previous one
# sha256sum -c sha256sum.txt.orig
if [ ${VERBOSE} ]; then
if [ ${COMPARE} ]; then
${DIFF} sha256sum.txt sha256sum.txt.orig |tee modified_websites.txt
websites_modified=`awk '/index.html/ {print $3}' modified_websites.txt|uniq`
for website_modified in $websites_modified
......@@ -184,8 +196,10 @@ if [ -z ${FIRST} ]; then
echo "#################################################################"
echo "Checking if there is any .orig file to compare with new result..."
echo "#################################################################"
echo ""
echo ""
if [ -f $website_modified ] && [ -f $orig_file ]; then
echo $DIFF $orig_file $website_modified
echo ""
$DIFF $orig_file $website_modified
fi
done
......@@ -193,7 +207,7 @@ if [ -z ${FIRST} ]; then
else
$DIFF -q sha256sum.txt sha256sum.txt.orig 1>/dev/null 2>&1
if [ $? -eq 1 ]; then
$DIFF sha256sum.txt sha256sum.txt.orig|tail -1|$AWK '{print $3}'
$DIFF sha256sum.txt sha256sum.txt.orig|$AWK '/[az]/ {print $3}' |uniq
fi
fi
......
......@@ -3,6 +3,7 @@
## url == url to the index page (required)
## type == [static] or [dyn]amic content (required)
## product == technology used by the website (ie.: if this is a CMS, just use the CMS name; if it is home made, just enter the language used) (required)
## plugin == specify optional plugin name for a CMS (check the script to see how to add exception for dynamic content)
## activity == [active|inactive] (required)
## owner == who/which company own this website (optionnal)
......@@ -13,3 +14,5 @@ rpacib https://shiny.mbb.univ-montp2.fr/RPACIB/ static shiny - active mbb
bpp http://biopp.univ-montp2.fr/wiki/index.php/Main_Page dyn typesetter - active julien
gitlab https://gitlab.mbb.univ-montp2.fr/users/sign_in dyn gitlab - active mbb
doc-user-mbb https://gitlab.mbb.univ-montp2.fr/docs/doc_user/docs/_book/index.html static gitbook - active mbb
intranet http://intranet.mbb.univ-montp2.fr/ dyn wp - active isem
nextcloud-mbb https://smallservices.mbb.univ-montp2.fr/owncloud/index.php/login dyn nextcloud - active mbb
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment