gps_csv_to_kml/gps_csv_to_kml

577 lines
22 KiB
Bash

#!/bin/bash
#===============================================================================
# DIRECTORY:
# ---
#
# FILE:
# ./gps_csv_to_kml
#
# BASIC USAGE
# (if a CSV-file "gps.log" is present in the same directory):
# $ ./gps_csv_to_kml
# OR
# $ bash gps_csv_to_kml
# $ ksh gps_csv_to_kml
# $ zsh gps_csv_to_kml
# $ dash gps_csv_to_kml
# $ busybox ash gps_csv_to_kml
# (then the default output file will be something like
# "gpl_201508291530450200.kml" and the logile
# something like "kml_201508291530450200.log")
#
# OPTIONS:
# -h : display usage and exit
# -i : define custom input file (default: gps.log)
# -l : define custom log file
# -n : disable logging (enabled by default)
# -o : define custom output file
# -q : disable stdout (enabled by default)
# -v : print version and exit
#
# DESCRIPTION:
# Expects an CSV-file (up to now only one format is supportet!) as input and
# build a KML-file (XML) for later use with esp. Google Earth or
# https://maps.google.com/.
#
# REQUIREMENTS (Linux):
# coreutils (date, uname, basename, tee, dirname, id, rm and printf), sed,
# procps, grep, gawk or maw, file, libc-bin (getent) and openssl.
#
# BUGS:
# ---
#
# NOTES:
# Tested on:
# - Debian GNU/Linux (7 & 8) + bash, zsh, ksh, busybox ash & dash
# - Gentoo GNU/Linux + bash, zsh, ksh, busybox ash & dash
# - Linux Mint 17.2 (Cinnamon) + bash, zsh, ksh, busybox ash & dash
# - FreeBSD (9 & 10) + bash, zsh & busybox ash
# - OS X (10.10.5) + bash (sh) and zsh
# ! (t)csh is NOT supported !
#
# AUTHOR:
# Patrick Neumann, patrick@neumannsland.de
#
# COMPANY:
# (privately)
#
# VERSION:
# 1.7
#
# LINK TO THE MOST CURRENT VERSION:
# (Sorry, I bet, I'm not allowed to publish it over GitHub!)
#
# CREATED:
# 2015-09-14
#
# COPYRIGHT (C):
# 2015 - Patrick Neumann
#
# LICENSE:
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# WARRANTY:
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# NOTE:
# 1. I fear, it's slightly exaggerated for the OSF assignment,
# but scripting/programming makes a lot of fun. Sorry!
# 2. Worth readable PDF/Link:
# http://lug.fh-swf.de/vim/vim-bash/StyleGuideShell.en.pdf
# 3. For complex XML-files i would prefere ruby + REXML over (ba)sh.
#
# TODO:
# ---
#
# HISTORY:
# 1.0 - Patrick Neumann - Initial (for the trainers eyes only) release
# 1.1 - Patrick Neumann - added checksumming (stdout/log/kml)
# 1.2 - Patrick Neumann - consideration of FreeBSD (userinfo)
# 1.3 - Patrick Neumann - more POSIX compliance (zsh, ksh, busybox ash & dash)
# 1.4 - Patrick Neumann - added date and time of execution (stdout/log/kml)
# 1.5 - Patrick Neumann - check for logfile and outfile in one function (dry!)
# 1.6 - Patrick Neumann - added os name and shell (stdout/log)
# 1.7 - Patrick Neumann - well tested version!
#
#===============================================================================
#-------------------------------------------------------------------------------
# (t)csh needs too many modifications!
# (different "if" syntax, no conditional command, "set" before var="val",...)
# Worth reading link: http://www.grymoire.com/unix/CshTop10.txt
#-------------------------------------------------------------------------------
test -n "${shell}" \
&& printf "\n\033[01;31;40m(t)csh is not supported... EXIT\!\!\!\033[00m\n\n" \
&& exit 1
#=== CONFIGURATION (static) ====================================================
# Version:
readonly VERSION="1.7"
readonly CREATED="2015-09-14"
# Absolute paths are more secure but less portable.
readonly DATE_BIN="$( which date )"
readonly SED_BIN="$( which sed )"
readonly UNAME_BIN="$( which uname )"
readonly BASENAME_BIN="$( which basename )"
readonly PS_BIN="$( which ps )"
readonly GREP_BIN="$( which grep )"
readonly TEE_BIN="$( which tee )"
readonly AWK_BIN="$( which awk )"
readonly FILE_BIN="$( which file )"
readonly GETENT_BIN="$( which getent )"
readonly DIRNAME_BIN="$( which dirname )"
readonly ID_BIN="$( which id )"
readonly OPENSSL_BIN="$( which openssl )"
readonly RM_BIN="$( which rm )"
ECHO_FUNC="display_and_log"
# Defaults:
DEFAULT_SHELL="bash"
DISABLE_LOGGING="no"
DISABLE_STDOUT="no"
GET_VERSION_ONLY="no"
GET_HELP="no"
readonly ISO_8601="$( ${DATE_BIN} "+%Y-%m-%dT%H:%M:%S%z" )"
CUSTOM_LOG_FILE=""
TEMP_LOG_FILE="/tmp/gps_csv_to_kml.temp"
readonly DEFAULT_INPUT_FILE="./gps.log"
CUSTOM_INPUT_FILE=""
readonly INPUT_FILE_MIME_TYPE="text/plain"
CUSTOM_OUTPUT_FILE=""
#-------------------------------------------------------------------------------
# Detect operating system name.
#-------------------------------------------------------------------------------
readonly OS_NAME="$( ${UNAME_BIN} -s )"
#-------------------------------------------------------------------------------
# Detect the shell in which we are running.
#-------------------------------------------------------------------------------
readonly PROCESS="$( ${BASENAME_BIN} "$( ${PS_BIN} -axco pid,command \
| ${GREP_BIN} "$$" \
| ${GREP_BIN} -v "grep" \
| ${AWK_BIN} '{ print $2; }' )" )"
# Why conditinal command should be prefered over test:
# https://google-styleguide.googlecode.com/svn/trunk/shell.xml#Test,_[_and_[[
# and why you don't, if you would support "dash":
# http://mywiki.wooledge.org/Bashism
if [ "${PROCESS}" = "$( ${BASENAME_BIN} "${0}" )" ] ; then
readonly CURRENT_SHELL="${DEFAULT_SHELL}"
else
readonly CURRENT_SHELL="${PROCESS}"
fi
# Linux can have alle shells and "/bin/echo" has no limitations.
# Darwin (14.5.0) has bash 3.2.57, zsh 5.0.5, ksh 93 and tcsh 6.17.00
# - kshs and tcshs builtin echo does not support "-e" and/or "-n"!
# - "/bin/echo" does not support "-e"!
# FreeBSD can have all shells, but "/bin/echo" has the same limitations!
# Solution: use printf instead!
# Worth readable Link: http://hyperpolyglot.org/unix-shells#echo-note
if [ "${CURRENT_SHELL}" = "zsh" ] ; then
# zsh does not split a string into words separated by spaces by default!
setopt shwordsplit
# zshs "which" find the builtin without "-p"!
readonly PRINTF_BIN="$( which -p printf )"
else
readonly PRINTF_BIN="$( which printf )"
fi
#=== CONFIGURATION (static) ... continuation ... ===============================
readonly ECHO_BIN="echo" # should be the shell builtin!
readonly SHORT_ISO8601_LIKE="$( ${ECHO_BIN} "${ISO_8601}" \
| ${SED_BIN} 's/[-:+]//g' )"
readonly DEFAULT_LOG_FILE="./kml_${SHORT_ISO8601_LIKE}.log"
readonly DEFAULT_OUTPUT_FILE="./gps_${SHORT_ISO8601_LIKE}.kml"
# XML declaration:
readonly XMLDECL='<?xml version="1.0" encoding="UTF-8"?>\n'
# XML namespaces:
readonly OPEN_NAMESPACES="<kml xmlns=\"http://www.opengis.net/kml/2.2\" \
xmlns:gx=\"http://www.google.com/kml/ext/2.2\" \
xmlns:kml=\"http://www.opengis.net/kml/2.2\" \
xmlns:atom=\"http://www.w3.org/2005/Atom\">\n"
# XML root:
readonly OPEN_ROOT=" <Document>\n"
# open three XML nodes:
readonly OPEN_3NODES=" <Placemark>
<Point>
<coordinates>"
# close three XML nodes:
readonly CLOSE_3NODES="</coordinates>
</Point>
</Placemark>\n"
# close XML root and namespaces:
readonly CLOSE_ROOT=" </Document>\n"
readonly CLOSE_NAMESPACES="</kml>"
readonly LONG_REGEXP="[0-9]{2}\.[0-9]{13,14}"
readonly LAT_REGEXP="[0-9]{2}\.[0-9]{13,14}"
readonly NN_REGEXP="[0-9]{5}"
readonly NCFI_REXEXP="${LONG_REGEXP},${LAT_REGEXP},${NN_REGEXP}"
COUNTER=0
VALID=0
INVALID=0
#=== FUNCTION ==================================================================
# NAME: usage
# DESCRIPTION: Display help.
# PARAMETER 1: -
#===============================================================================
usage() {
${PRINTF_BIN} "BASIC USAGE...\n"
${PRINTF_BIN} " (if a CSV-file \"gps.log\" is present in the same directory):\n"
${PRINTF_BIN} " $ ./gps_csv_to_kml\n"
${PRINTF_BIN} " OR\n"
${PRINTF_BIN} " $ bash gps_csv_to_kml\n"
${PRINTF_BIN} " $ ksh gps_csv_to_kml\n"
${PRINTF_BIN} " $ zsh gps_csv_to_kml\n"
${PRINTF_BIN} " $ dash gps_csv_to_kml\n"
${PRINTF_BIN} " $ busybox ash gps_csv_to_kml\n"
${PRINTF_BIN} " (then the default output file will be something like\n"
${PRINTF_BIN} " \"gps_201508291530450200.kml\" and the logile\n"
${PRINTF_BIN} " something like \"kml_201508291530450200.log\")\n\n"
${PRINTF_BIN} "OPTIONS:\n"
${PRINTF_BIN} " -h : display usage and exit\n"
${PRINTF_BIN} " -i : define custom input file (default: gps.log)\n"
${PRINTF_BIN} " -l : define custom log file\n"
${PRINTF_BIN} " -n : disable logging (enabled by default)\n"
${PRINTF_BIN} " -o : define custom output file\n"
${PRINTF_BIN} " -q : disable stdout (enabled by default)\n"
${PRINTF_BIN} " -v : print version and exit\n\n"
}
#=== CONFIGURATION (user) ======================================================
# (-) GNU- and BSD-getopt behave differently
# (+) getopts is more POSIX and system-/shell-portable
while getopts ":hnqvi:l:o:" opt ; do
case $opt in
h ) GET_HELP="yes" ;;
n ) DISABLE_LOGGING="yes" ;;
q ) DISABLE_STDOUT="yes" ;;
v ) GET_VERSION_ONLY="yes" ;;
i ) CUSTOM_INPUT_FILE="${OPTARG}" ;;
l ) CUSTOM_LOG_FILE="${OPTARG}" ;;
o ) CUSTOM_OUTPUT_FILE="${OPTARG}" ;;
\? ) ${PRINTF_BIN} "\n\033[01;31;40mInvalid option: -${OPTARG}\033[00m\n\n" >&2
usage
exit 1
;;
: ) ${PRINTF_BIN} "\n\033[01;31;40mOption -${OPTARG} requires an argument.\033[00m\n\n" >&2
usage
exit 1
;;
esac
done
#=== FUNCTION ==================================================================
# NAME: error_exit
# DESCRIPTION: Display red error messages surrounded by "ERROR:" and "EXIT!!!".
# PARAMETER 1: message (string)
#===============================================================================
error_exit () {
${PRINTF_BIN} "\n\033[01;31;40mERROR: ${1}... EXIT!!!\033[00m\n\n"
exit 1
}
#-------------------------------------------------------------------------------
# Check if a custom input file is given, is plain-text and readable,
# then use it, otherwise use the default.
#-------------------------------------------------------------------------------
if [ -n "${CUSTOM_INPUT_FILE}" ] ; then
if ! [ -r "${CUSTOM_INPUT_FILE}" ] ; then
error_exit "file does not exist/is not readable"
fi
if [ $( ${FILE_BIN} --brief --mime-type "${CUSTOM_INPUT_FILE}" ) != \
"${INPUT_FILE_MIME_TYPE}" ] ; then
error_exit "file is not plain-text (csv)"
fi
INPUT_FILE="${CUSTOM_INPUT_FILE}"
else
INPUT_FILE="${DEFAULT_INPUT_FILE}"
fi
#=== FUNCTION ==================================================================
# NAME: check_custom_file
# DESCRIPTION: Check if a custom file is given, has the right file extension and
# we have write rights in the given directory, then use it otherwise use the
# default.
# PARAMETER 1: custom file
# PARAMETER 2: file extension
# PARAMETER 3: file type
# PARAMETER 4: variable name
# PARAMETER 5: default file
#===============================================================================
check_custom_file() {
if [ -n "${1}" ] ; then
if ${ECHO_BIN} "${1}" \
| ${GREP_BIN} --basic-regexp --invert-match ".${2}$" > /dev/null 2>&1 ; then
error_exit "wrong extension of ${3} (.${2})"
fi
if ! [ -w "$( ${DIRNAME_BIN} "${1}" )" ] ; then
error_exit "target directory is not writable"
fi
# if you want to use a variable as variablename:
eval "${4}=\${1}"
else
eval "${4}=\${5}"
fi
}
#=== FUNCTION ==================================================================
# NAME: overwrite
# DESCRIPTION: Check if the custom file already exists and ask for overwriting
# or exit.
# PARAMETER 1: custom file
# PARAMETER 2: file type
#===============================================================================
overwrite() {
if [ -e "${1}" ] ; then
${PRINTF_BIN} "\n${2} does already exist, overwrite? \
(type YES in UPPER letters and hit return!) : "
read -r answer
if [ "${answer}" != "YES" ] ; then
error_exit "move the old ${2} to a save place and try again"
fi
# clear logfile
${PRINTF_BIN} "" > "${LOG_FILE}"
fi
}
#-------------------------------------------------------------------------------
# Check if a custom log file is given, has the right file extension and we have
# write rights in the given directory, then use it otherwise use the default.
#-------------------------------------------------------------------------------
check_custom_file "${CUSTOM_LOG_FILE}" "log" "logfile" "LOG_FILE" \
"${DEFAULT_LOG_FILE}"
#-------------------------------------------------------------------------------
# Check if the custom log file already exists and ask for overwriting or exit.
#-------------------------------------------------------------------------------
overwrite "${LOG_FILE}" "logfile"
#-------------------------------------------------------------------------------
# Check if a custom output file is given, has the right file extension and we
# have write rights in the given directory, then use it otherwise use the
# default.
#-------------------------------------------------------------------------------
check_custom_file "${CUSTOM_OUTPUT_FILE}" "kml" "outfile" "OUTPUT_FILE" \
"${DEFAULT_OUTPUT_FILE}"
#-------------------------------------------------------------------------------
# Check if the custom output file already exists and ask for overwriting or
# exit.
#-------------------------------------------------------------------------------
overwrite "${OUTPUT_FILE}" "outfile"
#=== FUNCTION ==================================================================
# NAME: display
# DESCRIPTION: Wrapper for "echo -n -e".
# PARAMETER 1: message (string)
#===============================================================================
display() {
${PRINTF_BIN} "${1}"
}
#=== FUNCTION ==================================================================
# NAME: log
# DESCRIPTION: Wrapper for "echo -n -e" incl. redirection into logfile.
# PARAMETER 1: message (string)
#===============================================================================
log() {
${PRINTF_BIN} "${1}" >> "${LOG_FILE}"
}
#=== FUNCTION ==================================================================
# NAME: display_and_log
# DESCRIPTION: Wrapper for "echo -n -e" incl. output to stdout AND redirection
# into logfile.
# PARAMETER 1: message (string)
#===============================================================================
display_and_log() {
${PRINTF_BIN} "${1}" | ${TEE_BIN} -a "${LOG_FILE}"
}
#=== FUNCTION ==================================================================
# NAME: quiet
# DESCRIPTION: Wrapper for "echo -n -e" incl. redirection into "nirvana".
# PARAMETER 1: message (string)
#===============================================================================
quiet() {
${PRINTF_BIN} "${1}" > /dev/null
}
#=== CONFIGURATION (output) ====================================================
if [ "${DISABLE_LOGGING}" = "yes" ] ; then ECHO_FUNC="display" ; fi
if [ "${DISABLE_STDOUT}" = "yes" ] ; then ECHO_FUNC="log" ; fi
if [ "${DISABLE_LOGGING}" = "yes" -a "${DISABLE_STDOUT}" = "yes" ] ; then
ECHO_FUNC="quiet"
fi
#=== FUNCTION ==================================================================
# NAME: error
# DESCRIPTION: Display red error messages starting with "ERROR:".
# PARAMETER 1: message (string)
#===============================================================================
error () {
"${ECHO_FUNC}" "\033[01;31;40mERROR: ${1}!!!\033[00m\n"
}
#=== FUNCTION ==================================================================
# NAME: hint
# DESCRIPTION: Display blue hint messages starting with "HINT:".
# PARAMETER 1: message (string)
#===============================================================================
hint () {
"${ECHO_FUNC}" "\033[01;34;40mHINT: ${1}!\033[00m\n"
}
#=== FUNCTION ==================================================================
# NAME: success
# DESCRIPTION: Display green success messages starting with "SUCCESS:".
# PARAMETER 1: message (string)
#===============================================================================
success () {
"${ECHO_FUNC}" "\033[01;32;40mSUCCESS: ${1}!\033[00m\n"
}
#-------------------------------------------------------------------------------
# Just display version, if "-h" or "-v" is given and exit without error code
# if "-v" is given.
#-------------------------------------------------------------------------------
if [ "${GET_HELP}" = "yes" -o "${GET_VERSION_ONLY}" = "yes" ] ; then
${PRINTF_BIN} "\nVersion: ${VERSION} (created: ${CREATED})\n\n"
else
${ECHO_FUNC} "\nVersion: ${VERSION} (created: ${CREATED})\n\n"
fi
if [ "${GET_VERSION_ONLY}" = "yes" ] ; then exit 0 ; fi
#-------------------------------------------------------------------------------
# Just display help, if "-h" is given and exit without error code.
#-------------------------------------------------------------------------------
# make only sence for stdout
if [ "${GET_HELP}" = "yes" ] ; then usage ; exit 0 ; fi
#-------------------------------------------------------------------------------
# OS name and shell.
#-------------------------------------------------------------------------------
${ECHO_FUNC} "System: ${OS_NAME} + ${CURRENT_SHELL}\n\n"
#-------------------------------------------------------------------------------
# Try to reconstruct the script call.
#-------------------------------------------------------------------------------
${ECHO_FUNC} "Cmdline: ${0}"
for part in ${*} ; do
${ECHO_FUNC} " ${part}"
done
${ECHO_FUNC} "\n\n"
#-------------------------------------------------------------------------------
# Display Informations of the user (fullname, login, id).
#-------------------------------------------------------------------------------
case "${OS_NAME}" in
Darwin)
readonly FULLNAME="$( ${ID_BIN} -F )" ;;
Linux|FreeBSD)
readonly FULLNAME="$( ${GETENT_BIN} passwd "${LOGNAME}" \
| cut -d ":" -f 5 | cut -d "," -f 1 )" ;;
*)
readonly FULLNAME="---" ;;
esac
${ECHO_FUNC} "User: ${FULLNAME} ($( ${ID_BIN} -un ):$( ${ID_BIN} -u ))\n\n"
#-------------------------------------------------------------------------------
# Custom logfile an "-n" doesn't make sense!
#-------------------------------------------------------------------------------
if [ -n "${CUSTOM_LOG_FILE}" -a "${DISABLE_LOGGING}" = "yes" ] ; then
error "Specify a custom logfile but don't want to log? I leave logging off"
fi
#-------------------------------------------------------------------------------
# Write beginning of an well formed XML-file.
#-------------------------------------------------------------------------------
${PRINTF_BIN} "${XMLDECL}${OPEN_NAMESPACES}" > "${OUTPUT_FILE}"
#-------------------------------------------------------------------------------
# Insert time of execution (XML file and/or stdout/logfile).
#-------------------------------------------------------------------------------
${ECHO_FUNC} "Date and Time: ${ISO_8601}\n\n"
${PRINTF_BIN} "<!--\nDate and Time: ${ISO_8601}\n" >> "${OUTPUT_FILE}"
#-------------------------------------------------------------------------------
# Insert comment with checksums (XML file and/or stdout/logfile).
#-------------------------------------------------------------------------------
readonly MD5SUM="$( ${OPENSSL_BIN} md5 "${INPUT_FILE}" )"
readonly SHA1SUM="$( ${OPENSSL_BIN} sha1 "${INPUT_FILE}" )"
${PRINTF_BIN} "${MD5SUM}\n${SHA1SUM}\n-->\n" >> "${OUTPUT_FILE}"
${ECHO_FUNC} "${MD5SUM}\n${SHA1SUM}\n\n"
#-------------------------------------------------------------------------------
# Open the root node.
#-------------------------------------------------------------------------------
${PRINTF_BIN} "${OPEN_ROOT}" >> "${OUTPUT_FILE}"
#-------------------------------------------------------------------------------
# Read CSV-data, transform and write XML-nodes.
# (Field 4 -> longitude, 2 -> latitude and 6 -> height above sea level)
#-------------------------------------------------------------------------------
# Hint: I would always prefere awk over cut from the beginning because of the
# possibility to use multiple chars or regexps in fs in the future by a
# minimum of changes!
#-------------------------------------------------------------------------------
for text in $( awk -F "," '{ OFS=","; print $4,$2,$6; }' "${INPUT_FILE}" ) ; do
if ${ECHO_BIN} "${text}" \
| ${GREP_BIN} --extended-regexp "${NCFI_REXEXP}" > /dev/null 2>&1 ; then
${PRINTF_BIN} "${OPEN_3NODES}$text${CLOSE_3NODES}" >> "${OUTPUT_FILE}"
# The "(d)ash" has no unary operators like "++"
VALID=$(( ${VALID} + 1 ))
else
INVALID=$(( ${INVALID} + 1 ))
fi
COUNTER=$(( ${COUNTER} + 1 ))
done
#-------------------------------------------------------------------------------
# Write end of an valid XML-file.
#-------------------------------------------------------------------------------
${PRINTF_BIN} "${CLOSE_ROOT}${CLOSE_NAMESPACES}\n" >> "${OUTPUT_FILE}"
#-------------------------------------------------------------------------------
# Display and/or log some statistics.
#-------------------------------------------------------------------------------
hint "${VALID}/${COUNTER} valid datasets transformed"
hint "${INVALID}/${COUNTER} invalid datasets rejected"
${ECHO_FUNC} "\n"
#-------------------------------------------------------------------------------
# If the script reached this point, everything is good! :-)
#-------------------------------------------------------------------------------
success "Transformation done"
${ECHO_FUNC} "\n"
#-------------------------------------------------------------------------------
# Remove color codes in logfile.
#-------------------------------------------------------------------------------
# BSDs sed does not know "--in-place"!
# BSDs sed "-i" needs "" and Linux sed "-i" does not!
if [ "${DISABLE_LOGGING}" != "yes" ] ; then
$SED_BIN 's/'$( $PRINTF_BIN "\033" )'\[01;3[0-9];40m//g' "${LOG_FILE}" \
> "${TEMP_LOG_FILE}"
$SED_BIN 's/'$( $PRINTF_BIN "\033" )'\[00m//g' "${TEMP_LOG_FILE}" \
> "${LOG_FILE}"
$RM_BIN "${TEMP_LOG_FILE}"
fi
exit 0