#!/bin/bash set -euo pipefail # Exit on error, undefined vars, and pipe failures # Configuration readonly BASE_URL="https://download.swissmedicinfo.ch" readonly PAGE_FILE="page.html" readonly OUTPUT_FILE="swissmedic_data.zip" readonly LOG_FILE="curl_output.log" readonly FALLBACK_VIEWSTATEGENERATOR="CA0B0334" # Logging function log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" } # Error handling function error_exit() { log "ERROR: $1" >&2 exit 1 } # Cleanup function cleanup() { local exit_code=$? if [[ $exit_code -ne 0 ]]; then log "Script failed. Check $LOG_FILE and $PAGE_FILE for debugging." fi exit $exit_code } trap cleanup EXIT # Function to extract form field value extract_field() { local field_id="$1" local file="$2" # More robust extraction using grep and sed grep -o "id=\"$field_id\"[^>]*value=\"[^\"]*\"" "$file" 2>/dev/null | \ sed 's/.*value="\([^"]*\)".*/\1/' || true } # Function to validate extracted values validate_field() { local field_name="$1" local field_value="$2" if [[ -z "$field_value" ]]; then log "WARNING: $field_name not found or empty" return 1 fi log "Extracted $field_name: ${field_value:0:50}..." # Show first 50 chars return 0 } # Function to resolve form action URL resolve_form_action() { local page_file="$1" # Extract form action with improved regex local form_action form_action=$(grep -o 'action="[^"]*"' "$page_file" 2>/dev/null | \ sed 's/action="\([^"]*\)"/\1/' | head -n1) if [[ -z "$form_action" ]]; then log "WARNING: Form action not found, using base URL" echo "$BASE_URL/" return fi # Resolve relative URLs case "$form_action" in http*) echo "$form_action" ;; /*) echo "$BASE_URL$form_action" ;; *) echo "$BASE_URL/$form_action" ;; esac } # Function to check if file is a valid ZIP validate_zip_file() { local file="$1" if [[ ! -s "$file" ]]; then return 1 fi # Check file signature (first 4 bytes should be PK for ZIP) if command -v file >/dev/null 2>&1; then file "$file" | grep -qi "zip\|archive" && return 0 fi # Alternative check using hexdump if command -v hexdump >/dev/null 2>&1; then local signature signature=$(hexdump -C "$file" | head -n1 | cut -d' ' -f2-3) [[ "$signature" == "50 4b" ]] && return 0 fi return 1 } # Main execution starts here log "Starting Swissmedic download script" # Step 1: Fetch the page log "Fetching page from $BASE_URL/" if ! curl -s -f --max-time 30 --retry 3 "$BASE_URL/" > "$PAGE_FILE"; then error_exit "Failed to fetch page. Check network connection and URL availability." fi log "Page saved to $PAGE_FILE ($(wc -c < "$PAGE_FILE") bytes)" # Step 2: Validate page contains form if ! grep -q '