extract_metadata.pl 4.67 KB
#!/usr/bin/perl -w

# First install  JSON::Parse XML::LibXML XML::LibXSLT
#requires packages libxml-libxslt-perl, libxml-xpath-perl, libxml-perl

use Data::Dumper;               # Perl core module
use strict;                     # Good practice
use warnings;                   # Good practice
use JSON::Parse  'json_file_to_perl';
use XML::LibXML;

# (1) quit unless we have the correct number of command-line args
my $num_args = $#ARGV + 1;
if ($num_args != 1) {
    print "\nUsage: extract_metadata.pl path/to/filename.json\n";
    exit;
}


my $filename=$ARGV[0];
unless (-e $filename) { 
    print "\nFile: $filename doesn't exist!\n"; 
    exit;
}

my $json_fromfile = json_file_to_perl ($filename);
my $json_base = $json_fromfile->{message};
#print Dumper $json_base;

# Create XML document
my $document = XML::LibXML->createDocument( "1.0", "UTF-8" );
my $root = $document->createElement( 'crossref' );
$document->setDocumentElement( $root );

my $subtitles = $document->createElement( 'subtitles' );
foreach my $item (@{$json_base->{subtitle}}) {
    my $subtitle = $document->createElement( 'subtitle' );
    $subtitle->addChild($document->createTextNode($item));
    $subtitles->addChild($subtitle);
}
$root->addChild($subtitles);

my $titles = $document->createElement( 'titles' );
foreach my $item (@{$json_base->{title}}) {
    my $title = $document->createElement( 'title' );
    $title->addChild($document->createTextNode($item));
    $titles->addChild($title);
}
$root->addChild($titles);

my $issue = $document->createElement( 'issue' );
$issue->addChild($document->createTextNode($json_base->{issue}));
$root->addChild($issue);

my $type = $document->createElement( 'type' );
$type->addChild($document->createTextNode($json_base->{type}));
$root->addChild($type);

my $volume = $document->createElement( 'volume' );
$volume->addChild($document->createTextNode($json_base->{volume}));
$root->addChild($volume);

my $container_titles = $document->createElement( 'container-titles' );
foreach my $item (@{$json_base->{'container-title'}}) {
    my $container_title = $document->createElement( 'container-title' );
    $container_title->addChild($document->createTextNode($item));
    $container_titles->addChild($container_title);
}
$root->addChild($container_titles);

my $URL = $document->createElement( 'URL' );
$URL->addChild($document->createTextNode($json_base->{URL}));
$root->addChild($URL);

my $DOI = $document->createElement( 'DOI' );
$DOI->addChild($document->createTextNode($json_base->{DOI}));
$root->addChild($DOI);


my $subjects = $document->createElement( 'subjects' );
foreach my $item (@{$json_base->{subject}}) {
    my $subject = $document->createElement( 'subject' );
    $subject->addChild($document->createTextNode($item));
    $subjects->addChild($subject);
}
$root->addChild($subjects);

my $ISSNs = $document->createElement( 'ISSNs' );
foreach my $item (@{$json_base->{ISSN}}) {
    my $ISSN = $document->createElement( 'ISSN' );
    $ISSN->addChild($document->createTextNode($item));
    $ISSNs->addChild($ISSN);
}
$root->addChild($ISSNs);

my $publisher = $document->createElement( 'publisher' );
$publisher->addChild($document->createTextNode($json_base->{publisher}));
$root->addChild($publisher);

my $authors = $document->createElement( 'authors' );
foreach my $item (@{$json_base->{author}}) {
    my $author = $document->createElement( 'author' );

    my $family = $document->createElement( 'family' );
    if (defined ($item->{'family'})) {
        $family->addChild($document->createTextNode($item->{'family'}));
    }

    my $given = $document->createElement( 'given' );
    if (defined ($item->{'given'})) {
        $given->addChild($document->createTextNode($item->{'given'}));
    }

    #print Dumper "<affiliation>".$item->{affiliation}."</affiliation>";
    $author->addChild($family);
    $author->addChild($given);
    $authors->addChild($author);
}
$root->addChild($authors);


my $issued = $document->createElement( 'issued' );
foreach my $item (@{$json_base->{issued}->{'date-parts'}}) {
    my $date = $document->createElement( 'date' );

    my $year = $document->createElement( 'year' );
    my $month = $document->createElement( 'month' );
    my $day = $document->createElement( 'day' );

    if (defined (@{$item}[0])) {
        $year->addChild($document->createTextNode(@{$item}[0]));
    }
    if (defined (@{$item}[1])) {
        $month->addChild($document->createTextNode(@{$item}[1]));    
    }
    if (defined (@{$item}[2])) {
        $day->addChild($document->createTextNode(@{$item}[2]));
    }

    $date->addChild($year);
    $date->addChild($month);
    $date->addChild($day);

    $issued->addChild($date);
}
$root->addChild($issued);


print $document->toString(2);
#print $document->toString(0);