#!/bin/sh

# Public domain notice for all NCBI EDirect scripts is located at:
# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice

useFtp=true
useHttps=false

while [ $# -gt 0 ]
do
  case "$1" in
    -ftp )
      useFtp=true
      useHttps=false
      shift
      ;;
    -http | -https )
      useFtp=false
      useHttps=true
      shift
      ;;
    * )
      break
      ;;
  esac
done

DownloadOneByFTP() {

  dir="$1"
  fl="$2"

  echo "$fl" |
  nquire -asp ftp.ncbi.nlm.nih.gov "pubmed" "$dir"

  # delete if file is present but empty
  if [ -f "$fl" ] && [ ! -s "$fl" ]
  then
    rm -f "$fl"
  fi

  # retry if no file
  if [ ! -f "$fl" ]
  then
    sleep 10
    echo "First Failed Download Retry" >&2
    echo "$fl" |
    nquire -asp ftp.ncbi.nlm.nih.gov "pubmed" "$dir"
  fi

  # retry again if still no file
  if [ ! -f "$fl" ]
  then
    sleep 10
    echo "Second Failed Download Retry" >&2
    echo "$fl" |
    nquire -asp ftp.ncbi.nlm.nih.gov "pubmed" "$dir"
  fi

  # verify contents
  if [ -s "$fl" ]
  then
    errs=$( (gunzip -c "$fl" | xtract -mixed -verify) 2>&1 )
    if [ -n "$errs" ]
    then
      # delete and retry one more time
      rm -f "$fl"
      sleep 10
      echo "Invalid Contents Retry" >&2
      echo "$fl" |
      nquire -asp ftp.ncbi.nlm.nih.gov "pubmed" "$dir"
      if [ -s "$fl" ]
      then
        errs=$( (gunzip -c "$fl" | xtract -mixed -verify) 2>&1 )
        if [ -n "$errs" ]
        then
          rm -f "$fl"
          frst=$( echo "$errs" | head -n 1 )
          echo "ERROR invalid file '$fl' deleted, errors start with '$frst'" >&2
        fi
      fi
    fi
  else
    rm -f "$fl"
    echo "Download of '$fl' Failed" >&2
  fi
}

DownloadOneByHTTPS() {

  dir="$1"
  fl="$2"

  nquire -get https://ftp.ncbi.nlm.nih.gov pubmed "$dir" "$fl" > "$fl"

  # delete if file is present but empty
  if [ -f "$fl" ] && [ ! -s "$fl" ]
  then
    rm -f "$fl"
  fi

  # retry if no file
  if [ ! -f "$fl" ]
  then
    sleep 10
    echo "First Failed Download Retry" >&2
    nquire -get https://ftp.ncbi.nlm.nih.gov pubmed "$dir" "$fl" > "$fl"
  fi

  # retry again if still no file
  if [ ! -f "$fl" ]
  then
    sleep 10
    echo "Second Failed Download Retry" >&2
    nquire -get https://ftp.ncbi.nlm.nih.gov pubmed "$dir" "$fl" > "$fl"
  fi

  # verify contents
  if [ -s "$fl" ]
  then
    errs=$( (gunzip -c "$fl" | xtract -mixed -verify) 2>&1 )
    if [ -n "$errs" ]
    then
      # delete and retry one more time
      rm -f "$fl"
      sleep 10
      echo "Invalid Contents Retry" >&2
      nquire -get https://ftp.ncbi.nlm.nih.gov pubmed "$dir" "$fl" > "$fl"
      if [ -s "$fl" ]
      then
        errs=$( (gunzip -c "$fl" | xtract -mixed -verify) 2>&1 )
        if [ -n "$errs" ]
        then
          rm -f "$fl"
          frst=$( echo "$errs" | head -n 1 )
          echo "ERROR invalid file '$fl' deleted, errors start with '$frst'" >&2
        fi
      fi
    fi
  else
    rm -f "$fl"
    echo "Download of '$fl' Failed" >&2
  fi
}

download() {
  dir="$1"
  if [ "$useFtp" = true ]
  then
    nquire -lst ftp.ncbi.nlm.nih.gov pubmed "$dir" |
    grep -v ".md5" | grep "xml.gz" |
    skip-if-file-exists |
    while read fl
    do
      echo "$fl"
      DownloadOneByFTP "$dir" "$fl"
    done
  elif [ "$useHttps" = true ]
  then
    nquire -get https://ftp.ncbi.nlm.nih.gov pubmed "$dir" |
    xtract -pattern a -if a -starts-with pubmed -and a -ends-with ".xml.gz" -element a |
    skip-if-file-exists |
    while read fl
    do
      sleep 1
      echo "$fl"
      DownloadOneByHTTPS "$dir" "$fl"
    done
  fi
}

if [ "$#" -eq 0 ]
then
  download "baseline"
  if [ $? -ne 0 ]
  then
    download "baseline"
  fi
  download "updatefiles"
  if [ $? -ne 0 ]
  then
    download "updatefiles"
  fi
fi

while [ "$#" -gt 0 ]
do
  sect="$1"
  shift
  download "$sect"
  if [ $? -ne 0 ]
  then
    download "$sect"
  fi
done
