Blame view

get_orcid_data.sh 3.44 KB
b28c1b85e   Paulo Graca   Versioning source...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
  #!/bin/bash
  
  # pgraca: paulo.graca@fccn.pt
  # this script downloads orcids from a source list and then process it and return the parsed information
  
  usage()
  {
  cat <<EOF
  Usage: $(basename $0) --file DIR|--orcid ORCID [options]
   
  this script downloads orcids from a source list and then process it and return the parsed information.
   
  Options:
   
    --file         Input file with ORCIDs to process.
   
    --dir          Target directory to save download data. Default with will be random generated in /tmp.
   
    --orcid        Orcid to download and process. Multiple separated by comma (,)
   
    --nodownload   Don't download files.
   
    --noprocess    Don't process files.
  
    --verbose      Output all messages.
   
   
  Examples:
   
    1. Download an orcid and process it.
   
       $(basename $0) --file /path/to/file.txt
       OrcID=0000-0002-9081-2728
       Name=Joao Mendes Moreira
       ResearcherID=L-3960-2013
  
    2. Orcid by argument.
   
       $(basename $0) --orcid "0000-0002-9081-2728"
       OrcID=0000-0002-9081-2728
       Name=Joao Mendes Moreira
       ResearcherID=L-3960-2013
    
  EOF
  }
  
  EXPECTED_ARGS=1
  E_BADARGS=65
  
  if [ $# -lt $EXPECTED_ARGS ]
  then
    usage
    exit 0
  fi
  
  
  while [ "$1" ]; do
    case "$1" in
          --file)
              shift
              FILE="$1"
              source="local"
              ;;
          --dir)
              shift
              TEMP_DIR="$1"
              ;;
          --orcid)
              shift
              IFS=',' read -a ORCIDS <<< "${1}"
              source="orcid"
              ;;
          --nodownload)
              shift
              NO_DOWNLOAD=yes
              ;;
          --noprocess)
              shift
              NO_PROCESS=yes
              ;;
          --verbose)
              shift
              VERBOSE=yes
              ;;
          --help)
              usage
              exit 0
              ;;
    esac
    shift
  done
  
  function download_orcid {
      wget --quiet --output-document=${1}.xml http://pub.orcid.org/v1.2/${1}/orcid-bio
  }
  
  echo $1
  
  if [ -z "$source" ]; then
    echo "--inputfile or --orcid is required."
    usage
    exit 0
  fi
  
  # Current script dir
  pushd `dirname $0` > /dev/null
  SCRIPTPATH=`pwd`
  popd > /dev/null
  
  
  if [ ! -d "$TEMP_DIR" ]; then
    # Control will enter here if $DIRECTORY doesn't exist.
    UUID=$(cat /proc/sys/kernel/random/uuid)
    TEMP_DIR=/tmp/${UUID}
    mkdir -p ${TEMP_DIR}
    [ -n "$VERBOSE" ] && echo "Created directory: ${TEMP_DIR}"
  fi
  cd ${TEMP_DIR}
  
  # Downloading
  if [ "$source" == "local" ] && [ -f "$FILE" ]; then
      if [ -z "$NO_DOWNLOAD" ]; then
  
          [ -n "$VERBOSE" ] && echo "Downloading orcids..."
          while read ORCID
          do    
              [ -n "$VERBOSE" ] && echo "Downloading... ${ORCID}"
              # Downloading ORCIDs
              download_orcid $ORCID
          done <$FILE
      fi
  fi
  if [ "$source" == "orcid" ]; then
      if [ -z "$NO_DOWNLOAD" ]; then
  
          [ -n "$VERBOSE" ] && echo "Downloading orcids..."
          for ORCID in "${ORCIDS[@]}"
          do    
              [ -n "$VERBOSE" ] && echo "Downloading... ${ORCID}"
              # Downloading ORCIDs
              download_orcid $ORCID
          done
      fi
  fi
  
  # Processing
  if [ -z "$NO_PROCESS" ]; then
      [ -n "$VERBOSE" ] && echo "Processing orcids..."
      for f in $TEMP_DIR/*.xml
      do
        if [ -s "$f" ]; then
            # if file is not empty
            [ -n "$VERBOSE" ] && echo "Processing $f file..."
            # take action on each file. $f store current file name
            perl $SCRIPTPATH/parse_orcid.pl ${f}
        else
           echo "$f is empty" >> $SCRIPTPATH/error.log
        fi
      done
  fi