prepare.sh 2.04 KB
#!/bin/bash

# find all PDF files
# time find . -type f -name '*.pdf' -exec ./prepare.sh {} \;

while [[ $# > 1 ]]
do
    echo $1
    shift # past argument or value
done

function mylogger() {
    local ERROR_COLOR='\\e[91m'
    local ARG=`echo $@|sed "s|\[ERROR\]|${ERROR_COLOR}\[ERROR\]|g"`
    echo -e "\e[90m$(date +%F\ %H:%M:%S:%N):\e[0m ${ARG}\e[0m"
}

file=$1
if [ ! -f $file ]; then
    mylogger "[ERROR] couldn't find file - $file"
    exit
fi

filename=$(basename $file)

# Current script dir
pushd `dirname $0` > /dev/null
SCRIPTPATH=`pwd`
if [ -d "${SCRIPTPATH}/ingester" ]; then
    SCRIPTPATH="${SCRIPTPATH}/ingester"
fi
popd > /dev/null


mylogger "start extracting DOI from PDF - $file"
DOI=$(perl $SCRIPTPATH/scripts/extract_pdftext.pl $file)

if [ -z "$DOI" ]; then
    mylogger "[ERROR] occurred when extracting the DOI, or it's missing"
    exit
fi

DOI_prefix=$(echo $DOI|cut -d'/' -f1)
DOI_suffix=$(echo $DOI|cut -d'/' -f2)

DOI_FILE=$DOI_prefix.$DOI_suffix


# download json file
#http://api.crossref.org/works/10.4025/reveducfis.v22i3.9976
mylogger "downloading $DOI from crossref"

ERROR_CODE=`echo $(curl --request GET -s -L -D /dev/stdout -o /tmp/$DOI_FILE.json http://api.crossref.org/works/$DOI)| head -n 1 | cut -d$' ' -f2`


if [ "$ERROR_CODE" != "200" ]; then
    mylogger "[ERROR] crossref didn't found DOI - $DOI"
    exit    
fi


mylogger "extracting metadata from crossref file"
# extract metadata from file
perl $SCRIPTPATH/scripts/extract_metadata.pl /tmp/$DOI_FILE.json 1>/tmp/$DOI_FILE.xml

# Prepare
mylogger "creating folder ./item_$DOI_FILE"
mkdir -p item_$DOI_FILE

mylogger "mapping data"
perl $SCRIPTPATH/scripts/map_metadata.pl /tmp/$DOI_FILE.xml dc 1> item_$DOI_FILE/dublin_core.xml
perl $SCRIPTPATH/scripts/map_metadata.pl /tmp/$DOI_FILE.xml degois 1> item_$DOI_FILE/metadata_degois.xml

mylogger "copying file"
echo "$filename"  > item_$DOI_FILE/contents
yes | cp -prf $file item_$DOI_FILE/$filename


mylogger "cleaning tempfiles"
rm -rf /tmp/$DOI_FILE.json
rm -rf /tmp/$DOI_FILE.xml

mylogger "ended......................................"