get_orcid_data.sh 3.43 KB
#!/bin/bash

# pgraca: paulo.graca@fccn.pt
# this script downloads orcids from a source list and then process it and return the parsed information

usage()
{
cat <<EOF
Usage: $(basename $0) --file DIR|--orcid ORCID [options]
 
this script downloads orcids from a source list and then process it and return the parsed information.
 
Options:
 
  --file         Input file with ORCIDs to process.
 
  --dir          Target directory to save download data. Default with will be random generated in /tmp.
 
  --orcid        Orcid to download and process. Multiple separated by comma (,)
 
  --nodownload   Don't download files.
 
  --noprocess    Don't process files.

  --verbose      Output all messages.
 
 
Examples:
 
  1. Download an orcid and process it.
 
     $(basename $0) --file /path/to/file.txt
     OrcID=0000-0002-1001-100X
     Name=Joao
     ResearcherID=L-1000-2000

  2. Orcid by argument.
 
     $(basename $0) --orcid "0000-0002-1001-100X"
     OrcID=0000-0002-1001-100X
     Name=Joao
     ResearcherID=L-1000-2000
  
EOF
}

EXPECTED_ARGS=1
E_BADARGS=65

if [ $# -lt $EXPECTED_ARGS ]
then
  usage
  exit 0
fi


while [ "$1" ]; do
  case "$1" in
        --file)
            shift
            FILE="$1"
            source="local"
            ;;
        --dir)
            shift
            TEMP_DIR="$1"
            ;;
        --orcid)
            shift
            IFS=',' read -a ORCIDS <<< "${1}"
            source="orcid"
            ;;
        --nodownload)
            shift
            NO_DOWNLOAD=yes
            ;;
        --noprocess)
            shift
            NO_PROCESS=yes
            ;;
        --verbose)
            shift
            VERBOSE=yes
            ;;
        --help)
            usage
            exit 0
            ;;
  esac
  shift
done

function download_orcid {
    wget --quiet --output-document=${1}.xml http://pub.orcid.org/v1.2/${1}/orcid-bio
}

echo $1

if [ "$NO_PROCESS" ] && [ -z "$source" ]; then
  echo "--inputfile or --orcid is required."
  usage
  exit 0
fi

# Current script dir
pushd `dirname $0` > /dev/null
SCRIPTPATH=`pwd`
popd > /dev/null


if [ ! -d "$TEMP_DIR" ]; then
  # Control will enter here if $DIRECTORY doesn't exist.
  UUID=$(cat /proc/sys/kernel/random/uuid)
  TEMP_DIR=/tmp/${UUID}
  mkdir -p ${TEMP_DIR}
  [ -n "$VERBOSE" ] && echo "Created directory: ${TEMP_DIR}"
fi
cd ${TEMP_DIR}

# Downloading
if [ "$source" == "local" ] && [ -f "$FILE" ]; then
    if [ -z "$NO_DOWNLOAD" ]; then

        [ -n "$VERBOSE" ] && echo "Downloading orcids..."
        while read ORCID
        do    
            [ -n "$VERBOSE" ] && echo "Downloading... ${ORCID}"
            # Downloading ORCIDs
            download_orcid $ORCID
        done <$FILE
    fi
fi
if [ "$source" == "orcid" ]; then
    if [ -z "$NO_DOWNLOAD" ]; then

        [ -n "$VERBOSE" ] && echo "Downloading orcids..."
        for ORCID in "${ORCIDS[@]}"
        do    
            [ -n "$VERBOSE" ] && echo "Downloading... ${ORCID}"
            # Downloading ORCIDs
            download_orcid $ORCID
        done
    fi
fi

# Processing
if [ -z "$NO_PROCESS" ]; then
    [ -n "$VERBOSE" ] && echo "Processing orcids..."
    for f in $TEMP_DIR/*.xml
    do
      if [ -s "$f" ]; then
          # if file is not empty
          [ -n "$VERBOSE" ] && echo "Processing $f file..."
          # take action on each file. $f store current file name
          perl $SCRIPTPATH/parse_orcid.pl ${f}
      else
         echo "$f is empty" >> $SCRIPTPATH/error.log
      fi
    done
fi