added the main script

This commit is contained in:
Patrick Neumann 2018-06-15 12:01:30 +02:00
parent 909edfcec9
commit 91ec537e7f

482
web_site_monitor Normal file
View File

@ -0,0 +1,482 @@
#!/bin/bash
#===============================================================================
# DIRECTORY:
# ---
#
# FILE:
# ./web_site_monitor
#
# BASIC USAGE:
# $ ./web_site_monitor
# OR
# $ bash web_site_monitor <URL>
# $ ksh web_site_monitor <URL>
# $ zsh web_site_monitor <URL>
# $ dash web_site_monitor <URL>
# $ busybox ash web_site_monitor <URL>
#
# OPTIONS:
# -d : enable debugging
# -h : display usage and exit
# -n : disable logging (enabled by default)
# -q : disable stdout (enabled by default)
# -v : print version and exit
# -E <regexp> : grep with extended regexp
# -F <regexp> : grep with fixed string
# -G <regexp> : grep with basic regexp
#
# EXIT STATES:
# 0 = success
# 1 = (t)csh is not supported
# 2 = a library is missing
# 3 = to many grep modes given
# 4 = invalid option given
# 5 = a given option requires an argument
# 6 = url is missing
# 7 = url doesn't begin with a wget compatible scheme
#
# (for exit codes greather then 100 look into the included libraries)
#
# DESCRIPTION:
# Downloads recursively a website, checks for differences against an an older
# version of the website if present and search for test with regular
# expressions if wanted.
#
# REQUIREMENTS:
# wget, date, find, tail, bc, mkdir, ln, ...
#
# BUGS:
# ---
#
# NOTES:
# Tested on:
# - ArchLinux + bash, zsh, ksh, busybox ash & dash
# - FreeBSD 11 + bash, zsh, ksh, dash & busybox ash
# - OS X (10.11.6) + bash (sh), zsh and ksh
# ! (t)csh is NOT supported !
#
# AUTHOR:
# Patrick Neumann, patrick@neumannsland.de
#
# COMPANY:
# (privately)
#
# VERSION:
# 1.0
#
# LINK TO THE MOST CURRENT VERSION:
# (Sorry, I bet, I'm not allowed to publish it over GitHub!)
#
# CREATED:
# 2016-10-18
#
# COPYRIGHT (C):
# 2016 - Patrick Neumann
#
# LICENSE:
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# WARRANTY:
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# NOTE:
# ---
#
# TODO:
# More testing:
# - Debian GNU/Linux 8.x + bash, zsh, ksh, dash & busybox ash
# - Linux Mint 18 (Cinnamon) + bash, zsh, ksh, dash & busybox ash
# - Windows 10 & Ubuntu bash
#
# HISTORY:
# 1.0 - Patrick Neumann - Initial (for the trainers eyes only) release
#
#===============================================================================
#-------------------------------------------------------------------------------
# (t)csh needs too many modifications!
# (different "if" syntax, no conditional command, "set" before var="val",...)
# Worth reading link: http://www.grymoire.com/unix/CshTop10.txt
#-------------------------------------------------------------------------------
test -n "${shell}" \
&& printf "\n\033[01;31;40mERROR: (t)csh is not supported... EXIT\!\!\!\033[00m\n\n" \
&& exit 1
#-------------------------------------------------------------------------------
# Checking for and including libraries.
#-------------------------------------------------------------------------------
# dash doesn't support arrays!
LIBRARIES="casualscripter_functions.sh"
readonly LIBRARIES="${LIBRARIES} forensicFunctions.sh"
for library in ${LIBRARIES} ; do
if ! [ -f "${0%/*}/${library}" ] ; then
printf "\n\033[01;31;40mERROR: library \"${library}\" missing... EXIT!!!\033[00m\n\n"
exit 2
fi
source "${0%/*}/${library}"
done
#=== CONFIGURATION (static) ====================================================
# Version:
readonly VERSION="1.0"
readonly CREATED="2016-10-18"
# BIN_WHICH already set in library!
# BIN_UNAME already set in library!
# BIN_BASENAME already set in library!
# BIN_PS already set in library!
# BIN_GREP already set in library!
# BIN_AWK already set in library!
# BIN_SED already set in library!
# BIN_DIRNAME already set in library!
# BIN_TEE already set in library!
# BIN_OPENSSL already set in library!
# BIN_PRINTF already set in library!
assign_binary "wget"
assign_binary "date"
assign_binary "find"
assign_binary "tail"
assign_binary "bc"
assign_binary "mkdir"
assign_binary "ln"
assign_binary "rmdir"
assign_binary "rm"
# Changeable defaults:
# Default of ECHO_FUNC in library is "display".
ECHO_FUNC="display_and_log"
GET_HELP="no"
ENABLE_DEBUGGING="no"
DISABLE_LOGGING="no"
DISABLE_STDOUT="no"
GET_VERSION_ONLY="no"
# The grep in macOS does not support Perl-compatible regular expressions.
# (That is why this script doesn't support it.)
GREP_MODE=""
# Switching the Digest while monitoring a website will break the detection
# of modified content the first time after switching.
# (That is why it's not part of the command line optinons.)
readonly DGST="md5"
readonly DIR_NEW="new_files"
readonly DIR_MV="renamed_files"
readonly DIR_MOD="modified_files"
readonly DIR_RM="deleted_files"
readonly DIR_GREP="search_and_found"
# Use UTC to prevent problems if the investigator have a flight into another timezone
readonly LAST_RUN="$( ${BIN_FIND} ${DARWIN_FIND_REGEXP_TYPE} . ${LINUX_FIND_REGEXP_TYPE} -regex '\./[[:digit:]]{8}T[[:digit:]]{6}UTC' -type d | sort -d | ${BIN_TAIL} -n 1 )"
readonly UTC="$( ${BIN_DATE} -u "+%s" )"
#readonly UTC="171320100" # 06.06.1975
#readonly UTC="1271231700" # 14.04.2010
readonly DIRECTORY="./$( ${BIN_DATE} -u ${DATE_DISPLAY}${UTC} "+%Y%m%dT%H%M%SUTC" )"
readonly LOCAL_DATE="$( ${BIN_DATE} -u ${DATE_DISPLAY}${UTC} )"
# Create target directory if necessary
if ! [ -d "${DIRECTORY}" ] ; then
${BIN_MKDIR} "${DIRECTORY}"
fi
readonly LAST_DGST_FILE="${LAST_RUN}/openssl_dgst-${DGST}.txt"
readonly LOG_FILE="${DIRECTORY}/wsm.log"
WGET_OUTPUT="--no-verbose"
WGET_LOGFILE="${LOG_FILE}"
readonly DGST_FILE="${DIRECTORY}/openssl_dgst-${DGST}.txt"
#=== FUNCTION ==================================================================
# NAME: usage
# DESCRIPTION: Display help.
# PARAMETER 1: -
#===============================================================================
usage() {
${BIN_PRINTF} "BASIC USAGE...\n"
${BIN_PRINTF} " $ ./web_site_monitor <URL>\n"
${BIN_PRINTF} " OR\n"
${BIN_PRINTF} " $ bash web_site_monitor <URL>\n"
${BIN_PRINTF} " $ ksh web_site_monitor <URL>\n"
${BIN_PRINTF} " $ zsh web_site_monitor <URL>\n"
${BIN_PRINTF} " $ dash web_site_monitor <URL>\n"
${BIN_PRINTF} " $ busybox ash web_site_monitor <URL>\n\n"
${BIN_PRINTF} "OPTIONS:\n"
${BIN_PRINTF} " -d : enable debugging\n"
${BIN_PRINTF} " -h : display usage and exit\n"
${BIN_PRINTF} " -n : disable logging (enabled by default)\n"
${BIN_PRINTF} " -q : disable stdout (enabled by default)\n"
${BIN_PRINTF} " -v : print version and exit\n"
${BIN_PRINTF} " -E <regexp> : text search with extended regexp\n"
${BIN_PRINTF} " -F <regexp> : text search with fixed string\n"
${BIN_PRINTF} " -G <regexp> : text search with basic regexp\n\n"
}
#=== FUNCTION ==================================================================
# NAME: anti_multi_grep
# DESCRIPTION: Only one text search of "-E", "-F" or "-G"
# incl. regexp make sense.
# PARAMETER 1: -
#===============================================================================
anti_multi_grep () {
if [ -n "${GREP_MODE}" ] ; then
${BIN_PRINTF} "\033[01;31;40mERROR: to many grep modes given... EXIT!!!\033[00m\n"
${BIN_RMDIR} "${DIRECTORY}"
usage
exit 3
fi
}
#=== CONFIGURATION (user 1/2) ==================================================
# (-) GNU- and BSD-getopt behave differently
# (+) getopts is more POSIX and system-/shell-portable
while getopts ":dhnqvE:F:G:" opt ; do
case $opt in
d ) readonly ENABLE_DEBUGGING="yes" ;;
h ) readonly GET_HELP="yes" ;;
n ) readonly DISABLE_LOGGING="yes" ;;
q ) readonly DISABLE_STDOUT="yes" ;;
v ) readonly GET_VERSION_ONLY="yes" ;;
E ) anti_multi_grep
readonly GREP_MODE="--extended-regexp"
readonly REGEXP="${OPTARG}"
;;
F ) anti_multi_grep
readonly GREP_MODE="--fixed-strings"
readonly REGEXP="${OPTARG}"
;;
G ) anti_multi_grep
readonly GREP_MODE="--basic-regexp"
readonly REGEXP="${OPTARG}"
;;
\? ) ${BIN_PRINTF} "\033[01;31;40mERROR: invalid option: -${OPTARG}... EXIT!!!\033[00m\n"
usage
exit 4
;;
: ) ${BIN_PRINTF} "\033[01;31;40mERROR: option -${OPTARG} requires an argument... EXIT!!!\033[00m\n"
usage
exit 5
;;
esac
done
LC_ALL="C" shift "$(( OPTIND - 1 ))"
#=== CONFIGURATION (output) ====================================================
if [ "${DISABLE_LOGGING}" = "yes" ] ; then
ECHO_FUNC="display"
WGET_LOGFILE="/dev/null"
fi
if [ "${DISABLE_STDOUT}" = "yes" ] ; then
ECHO_FUNC="log"
WGET_OUTPUT="--append-output=${LOG_FILE}"
fi
if [ "${DISABLE_LOGGING}" = "yes" -a "${DISABLE_STDOUT}" = "yes" ] ; then
ECHO_FUNC="quiet"
WGET_OUTPUT="--quiet"
WGET_LOGFILE="/dev/null"
fi
# clear logfile if necessary
if [ "${ECHO_FUNC}" = "display_and_log" -o "${ECHO_FUNC}" = "log" ] ; then
overwrite "${LOG_FILE}" logfile
fi
#-------------------------------------------------------------------------------
# Just display version, if "-h" or "-v" is given and exit without error code
# if "-v" is given.
#-------------------------------------------------------------------------------
if [ "${GET_HELP}" = "yes" -o "${GET_VERSION_ONLY}" = "yes" ] ; then
${BIN_PRINTF} "\nVersion: ${VERSION} (created: ${CREATED})\n\n"
else
${ECHO_FUNC} "\nVersion: ${VERSION} (created: ${CREATED})\n\n"
fi
if [ "${GET_VERSION_ONLY}" = "yes" ] ; then
${BIN_RMDIR} "${DIRECTORY}"
exit 0
fi
#-------------------------------------------------------------------------------
# Just display help, if "-h" is given and exit without error code.
#-------------------------------------------------------------------------------
# make only sence for stdout
if [ "${GET_HELP}" = "yes" ] ; then
${BIN_RMDIR} "${DIRECTORY}"
usage
exit 0
fi
#=== CONFIGURATION (user 2/2) ==================================================
if [ -z "${1}" ] ; then
${BIN_PRINTF} "\033[01;31;40mERROR: url is missing... EXIT!!!\033[00m\n\n"
${BIN_RM} -rf "${DIRECTORY}"
usage
exit 6
else
if ! ${BIN_PRINTF} "${1}" | ${BIN_GREP} --extended-regexp '(http|ftp)s?://' > /dev/null 2>&1 ; then
${BIN_PRINTF} "\033[01;31;40mERROR: url doesn't begin with a wget compatible scheme... EXIT!!!\033[00m\n\n"
${BIN_RM} -rf "${DIRECTORY}"
usage
exit 7
fi
fi
# wget needs one tailing slash if accessing a subdir on a server
readonly URL="$( ${BIN_PRINTF} "${1}" \
| ${BIN_SED} ${SED_EXT_REGEXP} 's#/*$#/#' )"
readonly SUBDIR="$( ${BIN_PRINTF} ${URL} \
| ${BIN_SED} ${SED_EXT_REGEXP} 's#^(http|ftp)s?://##' \
| ${BIN_SED} ${SED_EXT_REGEXP} 's#/$##' )"
#-------------------------------------------------------------------------------
# Output for some debugging (development).
#-------------------------------------------------------------------------------
if [ "${ENABLE_DEBUGGING}" = "yes" ] ; then
${ECHO_FUNC} "Debugging...\n\n"
${ECHO_FUNC} "UTC timestamp: ${UTC}\n"
${ECHO_FUNC} "Directory name: ${DIRECTORY}\n"
${ECHO_FUNC} "Logfile name: ${LOG_FILE}\n"
${ECHO_FUNC} "Hashfile name: ${DGST_FILE}\n"
${ECHO_FUNC} "Get help: ${GET_HELP}\n"
${ECHO_FUNC} "Enable debugging: ${ENABLE_DEBUGGING}\n"
${ECHO_FUNC} "Disable logging: ${DISABLE_LOGGING}\n"
${ECHO_FUNC} "Disable stdout: ${DISABLE_STDOUT}\n"
${ECHO_FUNC} "Grep mode: ${GREP_MODE}\n"
${ECHO_FUNC} "Regexp: ${REGEXP}\n"
${ECHO_FUNC} "URL (last param): ${URL}\n"
${ECHO_FUNC} "wget subdir (URL w/o scheme): ${SUBDIR}\n"
${ECHO_FUNC} "Echo function: ${ECHO_FUNC}\n"
${ECHO_FUNC} "Wget output: ${WGET_OUTPUT}\n"
${ECHO_FUNC} "Wget logfile: ${WGET_LOGFILE}\n\n"
fi
#-------------------------------------------------------------------------------
# Output/log first informations.
#-------------------------------------------------------------------------------
if [ -z "${LAST_RUN}" ] ; then
${ECHO_FUNC} "This is the first run.\n\n"
else
${ECHO_FUNC} "The directory of the last run is: ${LAST_RUN}\n\n"
fi
${ECHO_FUNC} "This run was starting at: ${LOCAL_DATE}\n\n"
#-------------------------------------------------------------------------------
# Recursive download with wget.
#-------------------------------------------------------------------------------
${ECHO_FUNC} "Mirroring website...\n\n"
# macOS and FreeBSD "tee" only support "-a" and not "--append"
${BIN_WGET} --recursive \
--level=10 \
--tries=2 \
--timeout=10 \
--execute robots=off \
--directory-prefix=${DIRECTORY} \
--user-agent='Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1' \
${WGET_OUTPUT} \
"${URL}" 2>&1 | ${BIN_TEE} -a "${WGET_LOGFILE}"
${ECHO_FUNC} "\nFiles are stored in ${DIRECTORY}/${SUBDIR}\n\n"
#-------------------------------------------------------------------------------
# Hashing with openssl. (md5sum is not supported by macOS.)
#-------------------------------------------------------------------------------
${ECHO_FUNC} "Calculationg hash values...\n\n"
${BIN_FIND} "${DIRECTORY}/${SUBDIR}" -type f -exec ${BIN_OPENSSL} dgst -"${DGST}" '{}' \; \
| ${BIN_SED} "s#${DIRECTORY}#.#" > "${DGST_FILE}"
${ECHO_FUNC} "The hashfile ${DGST_FILE} was successfully generated\n\n"
#-------------------------------------------------------------------------------
# Verifying hashes against older download.
#-------------------------------------------------------------------------------
if [ -n "${LAST_RUN}" ] ; then
${ECHO_FUNC} "Searching for differnences...\n\n"
while read line ; do
file="${line#*(}"
file="${file%)= *}"
hash="${line#*)= }"
if ! ${BIN_GREP} --fixed-strings "${line}" "${DGST_FILE}" > /dev/null 2>&1 ; then
if ${BIN_GREP} --fixed-strings "${file}" "${DGST_FILE}" > /dev/null 2>&1 ; then
if ! [ -d "${DIRECTORY}/${DIR_MOD}/$( ${BIN_DIRNAME} "${file#./}" )" ] ; then
# macOS and FreeBSD only support "-p" and not "--partents"
${BIN_MKDIR} -p "${DIRECTORY}/${DIR_MOD}/$( ${BIN_DIRNAME} "${file#./}" )"
fi
# macOS and FreeBSD only support "-s" and not "--symbolic"
${BIN_LN} -s "${PWD}/${DIRECTORY#./}/${file#./}" "${DIRECTORY}/${DIR_MOD}/${file#./}"
${ECHO_FUNC} "Symbolic link of modified/exchanged file ${file} was created in ${DIRECTORY}/${DIR_MOD}\n"
elif ${BIN_GREP} --fixed-strings "${hash}" "${DGST_FILE}" > /dev/null 2>&1 ; then
if ! [ -d "${DIRECTORY}/${DIR_MV}/$( ${BIN_DIRNAME} "${file#./}" )" ] ; then
${BIN_MKDIR} -p "${DIRECTORY}/${DIR_MV}/$( ${BIN_DIRNAME} "${file#./}" )"
fi
new_line="$( ${BIN_GREP} --extended-regexp "$( ${BIN_DIRNAME} "${file}" ).*${hash}" "${DGST_FILE}" )"
new_file="${new_line#*(}"
new_file="${new_file%)= *}"
${BIN_LN} -s "${PWD}/${DIRECTORY#./}/${new_file#./}" "${DIRECTORY}/${DIR_MV}/${file#./}"
${ECHO_FUNC} "Symbolic link of renamed/moved file ${file} was created in ${DIRECTORY}/${DIR_MV}\n"
else
if ! [ -d "${DIRECTORY}/${DIR_RM}/$( ${BIN_DIRNAME} "${file#./}" )" ] ; then
${BIN_MKDIR} -p "${DIRECTORY}/${DIR_RM}/$( ${BIN_DIRNAME} "${file#./}" )"
fi
${BIN_LN} -s "${PWD}/${LAST_RUN#./}/${file#./}" "${DIRECTORY}/${DIR_RM}/${file#./}"
${ECHO_FUNC} "Symbolic link of deleted file ${file} was created in ${DIRECTORY}/${DIR_RM}\n"
fi
fi
done < "${LAST_DGST_FILE}"
while read line ; do
file="${line#*(}"
file="${file%)= *}"
hash="${line#*)= }"
if ! ${BIN_GREP} --fixed-strings "${file}" "${LAST_DGST_FILE}" > /dev/null 2>&1 \
&& ! ${BIN_GREP} --fixed-strings "${hash}" "${LAST_DGST_FILE}" > /dev/null 2>&1 ; then
if ! [ -d "${DIRECTORY}/${DIR_NEW}/$( ${BIN_DIRNAME} "${file#./}" )" ] ; then
${BIN_MKDIR} -p "${DIRECTORY}/${DIR_NEW}/$( ${BIN_DIRNAME} "${file#./}" )"
fi
${BIN_LN} -s "${PWD}/${DIRECTORY#./}/${file#./}" "${DIRECTORY}/${DIR_NEW}/${file#./}"
${ECHO_FUNC} "Symbolic link of new added file ${file} was created in ${DIRECTORY}/${DIR_NEW}\n"
fi
done < "${DGST_FILE}"
${ECHO_FUNC} "... done.\n\n"
fi
#-------------------------------------------------------------------------------
# Text search (if given on the command line).
#-------------------------------------------------------------------------------
if [ -n "${REGEXP}" ] ; then
${ECHO_FUNC} "Processing text search...\n\n"
readonly HITS="$( ${BIN_GREP} --recursive \
--files-with-matches \
${GREP_MODE} "${REGEXP}" \
"${DIRECTORY}/${SUBDIR}" )"
if [ -n "${HITS}" ] ; then
OLDIFS=$IFS
IFS=$'\n'
for hit in ${HITS} ; do
echo "${hit}"
if ! [ -d "${DIRECTORY}/${DIR_GREP}/$( ${BIN_DIRNAME} "${hit#./*/}" )" ] ; then
${BIN_MKDIR} -p "${DIRECTORY}/${DIR_GREP}/$( ${BIN_DIRNAME} "${hit#./*/}" )"
fi
${BIN_LN} -s "${PWD}/${hit#./}" "${DIRECTORY}/${DIR_GREP}/${hit#./*/}"
${ECHO_FUNC} "Symbolic link of file ${hit} with search hit was created in ${DIRECTORY}/${DIR_GREP}\n"
done
IFS=$OLDIFS
fi
${ECHO_FUNC} "\n... done.\n\n"
fi
${ECHO_FUNC} "The script has successfully finished.\n\n"
exit 0