Blame view

ingester/scripts/extract_metadata.pl 4.58 KB
9d00822b4   Paulo Gra├ža   Initial commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
  #!/usr/bin/perl -w
  
  # First install  JSON::Parse XML::LibXML XML::LibXSLT
  #requires packages libxml-libxslt-perl, libxml-xpath-perl, libxml-perl
  
  use Data::Dumper;               # Perl core module
  use strict;                     # Good practice
  use warnings;                   # Good practice
  use JSON::Parse  'json_file_to_perl';
  use XML::LibXML;
  
  # (1) quit unless we have the correct number of command-line args
  my $num_args = $#ARGV + 1;
  if ($num_args != 1) {
      print "
  Usage: extract_metadata.pl path/to/filename.json
  ";
      exit;
  }
  
  
  my $filename=$ARGV[0];
  unless (-e $filename) { 
      print "
  File: $filename doesn't exist!
  "; 
      exit;
  }
  
  my $json_fromfile = json_file_to_perl ($filename);
  my $json_base = $json_fromfile->{message};
  #print Dumper $json_base;
  
  # Create XML document
  my $document = XML::LibXML->createDocument( "1.0", "UTF-8" );
  my $root = $document->createElement( 'crossref' );
  $document->setDocumentElement( $root );
  
  my $subtitles = $document->createElement( 'subtitles' );
  foreach my $item (@{$json_base->{subtitle}}) {
      my $subtitle = $document->createElement( 'subtitle' );
      $subtitle->addChild($document->createTextNode($item));
      $subtitles->addChild($subtitle);
  }
  $root->addChild($subtitles);
  
  my $titles = $document->createElement( 'titles' );
  foreach my $item (@{$json_base->{title}}) {
      my $title = $document->createElement( 'title' );
      $title->addChild($document->createTextNode($item));
      $titles->addChild($title);
  }
  $root->addChild($titles);
  
  my $issue = $document->createElement( 'issue' );
  $issue->addChild($document->createTextNode($json_base->{issue}));
  $root->addChild($issue);
  
  my $type = $document->createElement( 'type' );
  $type->addChild($document->createTextNode($json_base->{type}));
  $root->addChild($type);
  
  my $volume = $document->createElement( 'volume' );
  $volume->addChild($document->createTextNode($json_base->{volume}));
  $root->addChild($volume);
  
  my $container_titles = $document->createElement( 'container-titles' );
  foreach my $item (@{$json_base->{'container-title'}}) {
      my $container_title = $document->createElement( 'container-title' );
      $container_title->addChild($document->createTextNode($item));
      $container_titles->addChild($container_title);
  }
  $root->addChild($container_titles);
  
  my $URL = $document->createElement( 'URL' );
  $URL->addChild($document->createTextNode($json_base->{URL}));
  $root->addChild($URL);
  
  my $DOI = $document->createElement( 'DOI' );
  $DOI->addChild($document->createTextNode($json_base->{DOI}));
  $root->addChild($DOI);
  
  
  my $subjects = $document->createElement( 'subjects' );
  foreach my $item (@{$json_base->{subject}}) {
      my $subject = $document->createElement( 'subject' );
      $subject->addChild($document->createTextNode($item));
      $subjects->addChild($subject);
  }
  $root->addChild($subjects);
  
  my $ISSNs = $document->createElement( 'ISSNs' );
  foreach my $item (@{$json_base->{ISSN}}) {
      my $ISSN = $document->createElement( 'ISSN' );
      $ISSN->addChild($document->createTextNode($item));
      $ISSNs->addChild($ISSN);
  }
  $root->addChild($ISSNs);
  
  my $publisher = $document->createElement( 'publisher' );
  $publisher->addChild($document->createTextNode($json_base->{publisher}));
  $root->addChild($publisher);
  
  my $authors = $document->createElement( 'authors' );
  foreach my $item (@{$json_base->{author}}) {
      my $author = $document->createElement( 'author' );
  
      my $family = $document->createElement( 'family' );
      $family->addChild($document->createTextNode($item->{'family'}));
  
      my $given = $document->createElement( 'given' );
      $given->addChild($document->createTextNode($item->{'given'}));
  
      #print Dumper "<affiliation>".$item->{affiliation}."</affiliation>";
      $author->addChild($family);
      $author->addChild($given);
      $authors->addChild($author);
  }
  $root->addChild($authors);
  
  
  my $issued = $document->createElement( 'issued' );
  foreach my $item (@{$json_base->{issued}->{'date-parts'}}) {
      my $date = $document->createElement( 'date' );
  
      my $year = $document->createElement( 'year' );
      my $month = $document->createElement( 'month' );
      my $day = $document->createElement( 'day' );
  
      if (defined (@{$item}[0])) {
          $year->addChild($document->createTextNode(@{$item}[0]));
      }
      if (defined (@{$item}[1])) {
          $month->addChild($document->createTextNode(@{$item}[1]));    
      }
      if (defined (@{$item}[2])) {
          $day->addChild($document->createTextNode(@{$item}[2]));
      }
  
      $date->addChild($year);
      $date->addChild($month);
      $date->addChild($day);
  
      $issued->addChild($date);
  }
  $root->addChild($issued);
  
  
  print $document->toString(2);
  #print $document->toString(0);