prepare.sh
2.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/bin/bash
# find all PDF files
# time find . -type f -name '*.pdf' -exec ./prepare.sh {} \;
while [[ $# > 1 ]]
do
echo $1
shift # past argument or value
done
function mylogger() {
local ERROR_COLOR='\\e[91m'
local ARG=`echo $@|sed "s|\[ERROR\]|${ERROR_COLOR}\[ERROR\]|g"`
echo -e "\e[90m$(date +%F\ %H:%M:%S:%N):\e[0m ${ARG}\e[0m"
}
file=$1
if [ ! -f $file ]; then
mylogger "[ERROR] couldn't find file - $file"
exit
fi
filename=$(basename $file)
# Current script dir
pushd `dirname $0` > /dev/null
SCRIPTPATH=`pwd`
if [ -d $SCRIPTPATH'ingester' ]; then
SCRIPTPATH=$SCRIPTPATH'ingester/'
fi
popd > /dev/null
mylogger "start extracting DOI from PDF - $file"
DOI=$(perl $SCRIPTPATH/scripts/extract_pdftext.pl $file)
if [ -z "$DOI" ]; then
mylogger "[ERROR] occurred when extracting the DOI, or it's missing"
exit
fi
DOI_prefix=$(echo $DOI|cut -d'/' -f1)
DOI_suffix=$(echo $DOI|cut -d'/' -f2)
DOI_FILE=$DOI_prefix.$DOI_suffix
# download json file
#http://api.crossref.org/works/10.4025/reveducfis.v22i3.9976
mylogger "downloading $DOI from crossref"
ERROR_CODE=`echo $(curl --request GET -s -L -D /dev/stdout -o /tmp/$DOI_FILE.json http://api.crossref.org/works/$DOI)| head -n 1 | cut -d$' ' -f2`
if [ "$ERROR_CODE" != "200" ]; then
mylogger "[ERROR] crossref didn't found DOI - $DOI"
exit
fi
mylogger "extracting metadata from crossref file"
# extract metadata from file
perl $SCRIPTPATH/scripts/extract_metadata.pl /tmp/$DOI_FILE.json 1>/tmp/$DOI_FILE.xml
# Prepare
mylogger "creating folder ./item_$DOI_FILE"
mkdir -p item_$DOI_FILE
mylogger "mapping data"
perl $SCRIPTPATH/scripts/map_metadata.pl /tmp/$DOI_FILE.xml dc 1> item_$DOI_FILE/dublin_core.xml
perl $SCRIPTPATH/scripts/map_metadata.pl /tmp/$DOI_FILE.xml degois 1> item_$DOI_FILE/metadata_degois.xml
mylogger "copying file"
echo "$filename" > item_$DOI_FILE/contents
yes | cp -prf $file item_$DOI_FILE/$filename
mylogger "cleaning tempfiles"
rm -rf /tmp/$DOI_FILE.json
rm -rf /tmp/$DOI_FILE.xml
mylogger "ended......................................"