diff --git a/web_site_monitor b/web_site_monitor new file mode 100644 index 0000000..93ba95f --- /dev/null +++ b/web_site_monitor @@ -0,0 +1,482 @@ +#!/bin/bash +#=============================================================================== +# DIRECTORY: +# --- +# +# FILE: +# ./web_site_monitor +# +# BASIC USAGE: +# $ ./web_site_monitor +# OR +# $ bash web_site_monitor +# $ ksh web_site_monitor +# $ zsh web_site_monitor +# $ dash web_site_monitor +# $ busybox ash web_site_monitor +# +# OPTIONS: +# -d : enable debugging +# -h : display usage and exit +# -n : disable logging (enabled by default) +# -q : disable stdout (enabled by default) +# -v : print version and exit +# -E : grep with extended regexp +# -F : grep with fixed string +# -G : grep with basic regexp +# +# EXIT STATES: +# 0 = success +# 1 = (t)csh is not supported +# 2 = a library is missing +# 3 = to many grep modes given +# 4 = invalid option given +# 5 = a given option requires an argument +# 6 = url is missing +# 7 = url doesn't begin with a wget compatible scheme +# +# (for exit codes greather then 100 look into the included libraries) +# +# DESCRIPTION: +# Downloads recursively a website, checks for differences against an an older +# version of the website if present and search for test with regular +# expressions if wanted. +# +# REQUIREMENTS: +# wget, date, find, tail, bc, mkdir, ln, ... +# +# BUGS: +# --- +# +# NOTES: +# Tested on: +# - ArchLinux + bash, zsh, ksh, busybox ash & dash +# - FreeBSD 11 + bash, zsh, ksh, dash & busybox ash +# - OS X (10.11.6) + bash (sh), zsh and ksh +# ! (t)csh is NOT supported ! +# +# AUTHOR: +# Patrick Neumann, patrick@neumannsland.de +# +# COMPANY: +# (privately) +# +# VERSION: +# 1.0 +# +# LINK TO THE MOST CURRENT VERSION: +# (Sorry, I bet, I'm not allowed to publish it over GitHub!) +# +# CREATED: +# 2016-10-18 +# +# COPYRIGHT (C): +# 2016 - Patrick Neumann +# +# LICENSE: +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# WARRANTY: +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# NOTE: +# --- +# +# TODO: +# More testing: +# - Debian GNU/Linux 8.x + bash, zsh, ksh, dash & busybox ash +# - Linux Mint 18 (Cinnamon) + bash, zsh, ksh, dash & busybox ash +# - Windows 10 & Ubuntu bash +# +# HISTORY: +# 1.0 - Patrick Neumann - Initial (for the trainers eyes only) release +# +#=============================================================================== + +#------------------------------------------------------------------------------- +# (t)csh needs too many modifications! +# (different "if" syntax, no conditional command, "set" before var="val",...) +# Worth reading link: http://www.grymoire.com/unix/CshTop10.txt +#------------------------------------------------------------------------------- +test -n "${shell}" \ + && printf "\n\033[01;31;40mERROR: (t)csh is not supported... EXIT\!\!\!\033[00m\n\n" \ + && exit 1 + +#------------------------------------------------------------------------------- +# Checking for and including libraries. +#------------------------------------------------------------------------------- +# dash doesn't support arrays! +LIBRARIES="casualscripter_functions.sh" +readonly LIBRARIES="${LIBRARIES} forensicFunctions.sh" +for library in ${LIBRARIES} ; do + if ! [ -f "${0%/*}/${library}" ] ; then + printf "\n\033[01;31;40mERROR: library \"${library}\" missing... EXIT!!!\033[00m\n\n" + exit 2 + fi + source "${0%/*}/${library}" +done + +#=== CONFIGURATION (static) ==================================================== +# Version: +readonly VERSION="1.0" +readonly CREATED="2016-10-18" + +# BIN_WHICH already set in library! +# BIN_UNAME already set in library! +# BIN_BASENAME already set in library! +# BIN_PS already set in library! +# BIN_GREP already set in library! +# BIN_AWK already set in library! +# BIN_SED already set in library! +# BIN_DIRNAME already set in library! +# BIN_TEE already set in library! +# BIN_OPENSSL already set in library! +# BIN_PRINTF already set in library! +assign_binary "wget" +assign_binary "date" +assign_binary "find" +assign_binary "tail" +assign_binary "bc" +assign_binary "mkdir" +assign_binary "ln" +assign_binary "rmdir" +assign_binary "rm" + +# Changeable defaults: +# Default of ECHO_FUNC in library is "display". +ECHO_FUNC="display_and_log" +GET_HELP="no" +ENABLE_DEBUGGING="no" +DISABLE_LOGGING="no" +DISABLE_STDOUT="no" +GET_VERSION_ONLY="no" +# The grep in macOS does not support Perl-compatible regular expressions. +# (That is why this script doesn't support it.) +GREP_MODE="" + +# Switching the Digest while monitoring a website will break the detection +# of modified content the first time after switching. +# (That is why it's not part of the command line optinons.) +readonly DGST="md5" +readonly DIR_NEW="new_files" +readonly DIR_MV="renamed_files" +readonly DIR_MOD="modified_files" +readonly DIR_RM="deleted_files" +readonly DIR_GREP="search_and_found" + +# Use UTC to prevent problems if the investigator have a flight into another timezone +readonly LAST_RUN="$( ${BIN_FIND} ${DARWIN_FIND_REGEXP_TYPE} . ${LINUX_FIND_REGEXP_TYPE} -regex '\./[[:digit:]]{8}T[[:digit:]]{6}UTC' -type d | sort -d | ${BIN_TAIL} -n 1 )" +readonly UTC="$( ${BIN_DATE} -u "+%s" )" +#readonly UTC="171320100" # 06.06.1975 +#readonly UTC="1271231700" # 14.04.2010 +readonly DIRECTORY="./$( ${BIN_DATE} -u ${DATE_DISPLAY}${UTC} "+%Y%m%dT%H%M%SUTC" )" +readonly LOCAL_DATE="$( ${BIN_DATE} -u ${DATE_DISPLAY}${UTC} )" + +# Create target directory if necessary +if ! [ -d "${DIRECTORY}" ] ; then + ${BIN_MKDIR} "${DIRECTORY}" +fi + +readonly LAST_DGST_FILE="${LAST_RUN}/openssl_dgst-${DGST}.txt" +readonly LOG_FILE="${DIRECTORY}/wsm.log" +WGET_OUTPUT="--no-verbose" +WGET_LOGFILE="${LOG_FILE}" +readonly DGST_FILE="${DIRECTORY}/openssl_dgst-${DGST}.txt" + +#=== FUNCTION ================================================================== +# NAME: usage +# DESCRIPTION: Display help. +# PARAMETER 1: - +#=============================================================================== +usage() { + ${BIN_PRINTF} "BASIC USAGE...\n" + ${BIN_PRINTF} " $ ./web_site_monitor \n" + ${BIN_PRINTF} " OR\n" + ${BIN_PRINTF} " $ bash web_site_monitor \n" + ${BIN_PRINTF} " $ ksh web_site_monitor \n" + ${BIN_PRINTF} " $ zsh web_site_monitor \n" + ${BIN_PRINTF} " $ dash web_site_monitor \n" + ${BIN_PRINTF} " $ busybox ash web_site_monitor \n\n" + ${BIN_PRINTF} "OPTIONS:\n" + ${BIN_PRINTF} " -d : enable debugging\n" + ${BIN_PRINTF} " -h : display usage and exit\n" + ${BIN_PRINTF} " -n : disable logging (enabled by default)\n" + ${BIN_PRINTF} " -q : disable stdout (enabled by default)\n" + ${BIN_PRINTF} " -v : print version and exit\n" + ${BIN_PRINTF} " -E : text search with extended regexp\n" + ${BIN_PRINTF} " -F : text search with fixed string\n" + ${BIN_PRINTF} " -G : text search with basic regexp\n\n" +} + +#=== FUNCTION ================================================================== +# NAME: anti_multi_grep +# DESCRIPTION: Only one text search of "-E", "-F" or "-G" +# incl. regexp make sense. +# PARAMETER 1: - +#=============================================================================== +anti_multi_grep () { + if [ -n "${GREP_MODE}" ] ; then + ${BIN_PRINTF} "\033[01;31;40mERROR: to many grep modes given... EXIT!!!\033[00m\n" + ${BIN_RMDIR} "${DIRECTORY}" + usage + exit 3 + fi +} + +#=== CONFIGURATION (user 1/2) ================================================== +# (-) GNU- and BSD-getopt behave differently +# (+) getopts is more POSIX and system-/shell-portable +while getopts ":dhnqvE:F:G:" opt ; do + case $opt in + d ) readonly ENABLE_DEBUGGING="yes" ;; + h ) readonly GET_HELP="yes" ;; + n ) readonly DISABLE_LOGGING="yes" ;; + q ) readonly DISABLE_STDOUT="yes" ;; + v ) readonly GET_VERSION_ONLY="yes" ;; + E ) anti_multi_grep + readonly GREP_MODE="--extended-regexp" + readonly REGEXP="${OPTARG}" + ;; + F ) anti_multi_grep + readonly GREP_MODE="--fixed-strings" + readonly REGEXP="${OPTARG}" + ;; + G ) anti_multi_grep + readonly GREP_MODE="--basic-regexp" + readonly REGEXP="${OPTARG}" + ;; + \? ) ${BIN_PRINTF} "\033[01;31;40mERROR: invalid option: -${OPTARG}... EXIT!!!\033[00m\n" + usage + exit 4 + ;; + : ) ${BIN_PRINTF} "\033[01;31;40mERROR: option -${OPTARG} requires an argument... EXIT!!!\033[00m\n" + usage + exit 5 + ;; + esac +done +LC_ALL="C" shift "$(( OPTIND - 1 ))" + +#=== CONFIGURATION (output) ==================================================== +if [ "${DISABLE_LOGGING}" = "yes" ] ; then + ECHO_FUNC="display" + WGET_LOGFILE="/dev/null" +fi +if [ "${DISABLE_STDOUT}" = "yes" ] ; then + ECHO_FUNC="log" + WGET_OUTPUT="--append-output=${LOG_FILE}" +fi +if [ "${DISABLE_LOGGING}" = "yes" -a "${DISABLE_STDOUT}" = "yes" ] ; then + ECHO_FUNC="quiet" + WGET_OUTPUT="--quiet" + WGET_LOGFILE="/dev/null" +fi + +# clear logfile if necessary +if [ "${ECHO_FUNC}" = "display_and_log" -o "${ECHO_FUNC}" = "log" ] ; then + overwrite "${LOG_FILE}" logfile +fi + +#------------------------------------------------------------------------------- +# Just display version, if "-h" or "-v" is given and exit without error code +# if "-v" is given. +#------------------------------------------------------------------------------- +if [ "${GET_HELP}" = "yes" -o "${GET_VERSION_ONLY}" = "yes" ] ; then + ${BIN_PRINTF} "\nVersion: ${VERSION} (created: ${CREATED})\n\n" +else + ${ECHO_FUNC} "\nVersion: ${VERSION} (created: ${CREATED})\n\n" +fi +if [ "${GET_VERSION_ONLY}" = "yes" ] ; then + ${BIN_RMDIR} "${DIRECTORY}" + exit 0 +fi + +#------------------------------------------------------------------------------- +# Just display help, if "-h" is given and exit without error code. +#------------------------------------------------------------------------------- +# make only sence for stdout +if [ "${GET_HELP}" = "yes" ] ; then + ${BIN_RMDIR} "${DIRECTORY}" + usage + exit 0 +fi + +#=== CONFIGURATION (user 2/2) ================================================== +if [ -z "${1}" ] ; then + ${BIN_PRINTF} "\033[01;31;40mERROR: url is missing... EXIT!!!\033[00m\n\n" + ${BIN_RM} -rf "${DIRECTORY}" + usage + exit 6 +else + if ! ${BIN_PRINTF} "${1}" | ${BIN_GREP} --extended-regexp '(http|ftp)s?://' > /dev/null 2>&1 ; then + ${BIN_PRINTF} "\033[01;31;40mERROR: url doesn't begin with a wget compatible scheme... EXIT!!!\033[00m\n\n" + ${BIN_RM} -rf "${DIRECTORY}" + usage + exit 7 + fi +fi +# wget needs one tailing slash if accessing a subdir on a server +readonly URL="$( ${BIN_PRINTF} "${1}" \ + | ${BIN_SED} ${SED_EXT_REGEXP} 's#/*$#/#' )" +readonly SUBDIR="$( ${BIN_PRINTF} ${URL} \ + | ${BIN_SED} ${SED_EXT_REGEXP} 's#^(http|ftp)s?://##' \ + | ${BIN_SED} ${SED_EXT_REGEXP} 's#/$##' )" + +#------------------------------------------------------------------------------- +# Output for some debugging (development). +#------------------------------------------------------------------------------- +if [ "${ENABLE_DEBUGGING}" = "yes" ] ; then + ${ECHO_FUNC} "Debugging...\n\n" + ${ECHO_FUNC} "UTC timestamp: ${UTC}\n" + ${ECHO_FUNC} "Directory name: ${DIRECTORY}\n" + ${ECHO_FUNC} "Logfile name: ${LOG_FILE}\n" + ${ECHO_FUNC} "Hashfile name: ${DGST_FILE}\n" + ${ECHO_FUNC} "Get help: ${GET_HELP}\n" + ${ECHO_FUNC} "Enable debugging: ${ENABLE_DEBUGGING}\n" + ${ECHO_FUNC} "Disable logging: ${DISABLE_LOGGING}\n" + ${ECHO_FUNC} "Disable stdout: ${DISABLE_STDOUT}\n" + ${ECHO_FUNC} "Grep mode: ${GREP_MODE}\n" + ${ECHO_FUNC} "Regexp: ${REGEXP}\n" + ${ECHO_FUNC} "URL (last param): ${URL}\n" + ${ECHO_FUNC} "wget subdir (URL w/o scheme): ${SUBDIR}\n" + ${ECHO_FUNC} "Echo function: ${ECHO_FUNC}\n" + ${ECHO_FUNC} "Wget output: ${WGET_OUTPUT}\n" + ${ECHO_FUNC} "Wget logfile: ${WGET_LOGFILE}\n\n" +fi + +#------------------------------------------------------------------------------- +# Output/log first informations. +#------------------------------------------------------------------------------- +if [ -z "${LAST_RUN}" ] ; then + ${ECHO_FUNC} "This is the first run.\n\n" +else + ${ECHO_FUNC} "The directory of the last run is: ${LAST_RUN}\n\n" +fi + +${ECHO_FUNC} "This run was starting at: ${LOCAL_DATE}\n\n" + +#------------------------------------------------------------------------------- +# Recursive download with wget. +#------------------------------------------------------------------------------- +${ECHO_FUNC} "Mirroring website...\n\n" + +# macOS and FreeBSD "tee" only support "-a" and not "--append" +${BIN_WGET} --recursive \ + --level=10 \ + --tries=2 \ + --timeout=10 \ + --execute robots=off \ + --directory-prefix=${DIRECTORY} \ + --user-agent='Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1' \ + ${WGET_OUTPUT} \ + "${URL}" 2>&1 | ${BIN_TEE} -a "${WGET_LOGFILE}" + +${ECHO_FUNC} "\nFiles are stored in ${DIRECTORY}/${SUBDIR}\n\n" + +#------------------------------------------------------------------------------- +# Hashing with openssl. (md5sum is not supported by macOS.) +#------------------------------------------------------------------------------- +${ECHO_FUNC} "Calculationg hash values...\n\n" + +${BIN_FIND} "${DIRECTORY}/${SUBDIR}" -type f -exec ${BIN_OPENSSL} dgst -"${DGST}" '{}' \; \ +| ${BIN_SED} "s#${DIRECTORY}#.#" > "${DGST_FILE}" + +${ECHO_FUNC} "The hashfile ${DGST_FILE} was successfully generated\n\n" + +#------------------------------------------------------------------------------- +# Verifying hashes against older download. +#------------------------------------------------------------------------------- +if [ -n "${LAST_RUN}" ] ; then + ${ECHO_FUNC} "Searching for differnences...\n\n" + + while read line ; do + file="${line#*(}" + file="${file%)= *}" + hash="${line#*)= }" + + if ! ${BIN_GREP} --fixed-strings "${line}" "${DGST_FILE}" > /dev/null 2>&1 ; then + if ${BIN_GREP} --fixed-strings "${file}" "${DGST_FILE}" > /dev/null 2>&1 ; then + if ! [ -d "${DIRECTORY}/${DIR_MOD}/$( ${BIN_DIRNAME} "${file#./}" )" ] ; then + # macOS and FreeBSD only support "-p" and not "--partents" + ${BIN_MKDIR} -p "${DIRECTORY}/${DIR_MOD}/$( ${BIN_DIRNAME} "${file#./}" )" + fi + # macOS and FreeBSD only support "-s" and not "--symbolic" + ${BIN_LN} -s "${PWD}/${DIRECTORY#./}/${file#./}" "${DIRECTORY}/${DIR_MOD}/${file#./}" + ${ECHO_FUNC} "Symbolic link of modified/exchanged file ${file} was created in ${DIRECTORY}/${DIR_MOD}\n" + elif ${BIN_GREP} --fixed-strings "${hash}" "${DGST_FILE}" > /dev/null 2>&1 ; then + if ! [ -d "${DIRECTORY}/${DIR_MV}/$( ${BIN_DIRNAME} "${file#./}" )" ] ; then + ${BIN_MKDIR} -p "${DIRECTORY}/${DIR_MV}/$( ${BIN_DIRNAME} "${file#./}" )" + fi + new_line="$( ${BIN_GREP} --extended-regexp "$( ${BIN_DIRNAME} "${file}" ).*${hash}" "${DGST_FILE}" )" + new_file="${new_line#*(}" + new_file="${new_file%)= *}" + ${BIN_LN} -s "${PWD}/${DIRECTORY#./}/${new_file#./}" "${DIRECTORY}/${DIR_MV}/${file#./}" + ${ECHO_FUNC} "Symbolic link of renamed/moved file ${file} was created in ${DIRECTORY}/${DIR_MV}\n" + else + if ! [ -d "${DIRECTORY}/${DIR_RM}/$( ${BIN_DIRNAME} "${file#./}" )" ] ; then + ${BIN_MKDIR} -p "${DIRECTORY}/${DIR_RM}/$( ${BIN_DIRNAME} "${file#./}" )" + fi + ${BIN_LN} -s "${PWD}/${LAST_RUN#./}/${file#./}" "${DIRECTORY}/${DIR_RM}/${file#./}" + ${ECHO_FUNC} "Symbolic link of deleted file ${file} was created in ${DIRECTORY}/${DIR_RM}\n" + fi + fi + done < "${LAST_DGST_FILE}" + + while read line ; do + file="${line#*(}" + file="${file%)= *}" + hash="${line#*)= }" + + if ! ${BIN_GREP} --fixed-strings "${file}" "${LAST_DGST_FILE}" > /dev/null 2>&1 \ + && ! ${BIN_GREP} --fixed-strings "${hash}" "${LAST_DGST_FILE}" > /dev/null 2>&1 ; then + if ! [ -d "${DIRECTORY}/${DIR_NEW}/$( ${BIN_DIRNAME} "${file#./}" )" ] ; then + ${BIN_MKDIR} -p "${DIRECTORY}/${DIR_NEW}/$( ${BIN_DIRNAME} "${file#./}" )" + fi + ${BIN_LN} -s "${PWD}/${DIRECTORY#./}/${file#./}" "${DIRECTORY}/${DIR_NEW}/${file#./}" + ${ECHO_FUNC} "Symbolic link of new added file ${file} was created in ${DIRECTORY}/${DIR_NEW}\n" + fi + done < "${DGST_FILE}" + + ${ECHO_FUNC} "... done.\n\n" +fi + +#------------------------------------------------------------------------------- +# Text search (if given on the command line). +#------------------------------------------------------------------------------- +if [ -n "${REGEXP}" ] ; then + ${ECHO_FUNC} "Processing text search...\n\n" + + readonly HITS="$( ${BIN_GREP} --recursive \ + --files-with-matches \ + ${GREP_MODE} "${REGEXP}" \ + "${DIRECTORY}/${SUBDIR}" )" + + if [ -n "${HITS}" ] ; then + OLDIFS=$IFS + IFS=$'\n' + for hit in ${HITS} ; do + echo "${hit}" + if ! [ -d "${DIRECTORY}/${DIR_GREP}/$( ${BIN_DIRNAME} "${hit#./*/}" )" ] ; then + ${BIN_MKDIR} -p "${DIRECTORY}/${DIR_GREP}/$( ${BIN_DIRNAME} "${hit#./*/}" )" + fi + ${BIN_LN} -s "${PWD}/${hit#./}" "${DIRECTORY}/${DIR_GREP}/${hit#./*/}" + ${ECHO_FUNC} "Symbolic link of file ${hit} with search hit was created in ${DIRECTORY}/${DIR_GREP}\n" + done + IFS=$OLDIFS + fi + + ${ECHO_FUNC} "\n... done.\n\n" +fi + +${ECHO_FUNC} "The script has successfully finished.\n\n" + +exit 0