Commit c5bc8d799fe6853388733d9a1a964783fa6ec013

Authored by fribeiro
1 parent 68c0e2873a
Exists in DSpace52++

Added XOAI.java to modules with changes to Verbose Output. Now it will show the …

…handle before compile. This will enables to see which handle crashes the oai import when an item has invalid characters!
dspace/modules/oai/src/main/java/org/dspace/xoai/app/XOAI.java
... ... @@ -0,0 +1,517 @@
  1 +/**
  2 + * The contents of this file are subject to the license and copyright
  3 + * detailed in the LICENSE and NOTICE files at the root of the source
  4 + * tree and available online at
  5 + *
  6 + * http://www.dspace.org/license/
  7 + */
  8 +package org.dspace.xoai.app;
  9 +
  10 +import com.lyncode.xoai.dataprovider.exceptions.ConfigurationException;
  11 +import com.lyncode.xoai.dataprovider.exceptions.MetadataBindException;
  12 +import com.lyncode.xoai.dataprovider.exceptions.WritingXmlException;
  13 +import com.lyncode.xoai.dataprovider.xml.XmlOutputContext;
  14 +import org.apache.commons.cli.CommandLine;
  15 +import org.apache.commons.cli.CommandLineParser;
  16 +import org.apache.commons.cli.Options;
  17 +import org.apache.commons.cli.PosixParser;
  18 +import org.apache.log4j.LogManager;
  19 +import org.apache.log4j.Logger;
  20 +import org.apache.solr.client.solrj.SolrQuery;
  21 +import org.apache.solr.client.solrj.SolrQuery.ORDER;
  22 +import org.apache.solr.client.solrj.SolrServer;
  23 +import org.apache.solr.client.solrj.SolrServerException;
  24 +import org.apache.solr.common.SolrDocumentList;
  25 +import org.apache.solr.common.SolrInputDocument;
  26 +import org.dspace.authorize.AuthorizeException;
  27 +import org.dspace.authorize.AuthorizeManager;
  28 +import org.dspace.content.*;
  29 +import org.dspace.core.ConfigurationManager;
  30 +import org.dspace.core.Constants;
  31 +import org.dspace.core.Context;
  32 +import org.dspace.storage.rdbms.DatabaseManager;
  33 +import org.dspace.storage.rdbms.TableRowIterator;
  34 +import org.dspace.xoai.exceptions.CompilingException;
  35 +import org.dspace.xoai.services.api.cache.XOAICacheService;
  36 +import org.dspace.xoai.services.api.cache.XOAIItemCacheService;
  37 +import org.dspace.xoai.services.api.cache.XOAILastCompilationCacheService;
  38 +import org.dspace.xoai.services.api.config.ConfigurationService;
  39 +import org.dspace.xoai.services.api.config.XOAIManagerResolver;
  40 +import org.dspace.xoai.services.api.context.ContextService;
  41 +import org.dspace.xoai.services.api.database.CollectionsService;
  42 +import org.dspace.xoai.services.api.solr.SolrServerResolver;
  43 +import org.dspace.xoai.solr.DSpaceSolrSearch;
  44 +import org.dspace.xoai.solr.exceptions.DSpaceSolrException;
  45 +import org.dspace.xoai.solr.exceptions.DSpaceSolrIndexerException;
  46 +import org.springframework.beans.factory.annotation.Autowired;
  47 +import org.springframework.context.annotation.AnnotationConfigApplicationContext;
  48 +
  49 +import javax.xml.stream.XMLStreamException;
  50 +import java.io.ByteArrayOutputStream;
  51 +import java.io.IOException;
  52 +import java.net.ConnectException;
  53 +import java.sql.SQLException;
  54 +import java.text.ParseException;
  55 +import java.util.ArrayList;
  56 +import java.util.Arrays;
  57 +import java.util.Date;
  58 +import java.util.List;
  59 +
  60 +import static com.lyncode.xoai.dataprovider.core.Granularity.Second;
  61 +import static org.dspace.content.Item.find;
  62 +import static org.dspace.xoai.util.ItemUtils.retrieveMetadata;
  63 +
  64 +/**
  65 + * @author Lyncode Development Team <dspace@lyncode.com>
  66 + */
  67 +@SuppressWarnings("deprecation")
  68 +public class XOAI {
  69 + private static Logger log = LogManager.getLogger(XOAI.class);
  70 +
  71 + private Context context;
  72 + private boolean optimize;
  73 + private boolean verbose;
  74 + private boolean clean;
  75 +
  76 + @Autowired
  77 + private SolrServerResolver solrServerResolver;
  78 + @Autowired
  79 + private XOAIManagerResolver xoaiManagerResolver;
  80 + @Autowired
  81 + private ContextService contextService;
  82 + @Autowired
  83 + private XOAILastCompilationCacheService xoaiLastCompilationCacheService;
  84 + @Autowired
  85 + private XOAICacheService xoaiCacheService;
  86 + @Autowired
  87 + private XOAIItemCacheService xoaiItemCacheService;
  88 + @Autowired
  89 + private CollectionsService collectionsService;
  90 +
  91 +
  92 + private static List<String> getFileFormats(Item item) {
  93 + List<String> formats = new ArrayList<String>();
  94 + try {
  95 + for (Bundle b : item.getBundles("ORIGINAL")) {
  96 + for (Bitstream bs : b.getBitstreams()) {
  97 + if (!formats.contains(bs.getFormat().getMIMEType())) {
  98 + formats.add(bs.getFormat().getMIMEType());
  99 + }
  100 + }
  101 + }
  102 + } catch (SQLException ex) {
  103 + log.error(ex.getMessage(), ex);
  104 + }
  105 + return formats;
  106 + }
  107 +
  108 + public XOAI(Context context, boolean optimize, boolean clean, boolean verbose) {
  109 + this.context = context;
  110 + this.optimize = optimize;
  111 + this.clean = clean;
  112 + this.verbose = verbose;
  113 + }
  114 +
  115 + public XOAI(Context ctx, boolean hasOption) {
  116 + context = ctx;
  117 + verbose = hasOption;
  118 + }
  119 +
  120 + private void println(String line) {
  121 + System.out.println(line);
  122 + }
  123 +
  124 + public int index() throws DSpaceSolrIndexerException {
  125 + int result = 0;
  126 + try {
  127 +
  128 + if (clean) {
  129 + clearIndex();
  130 + System.out.println("Using full import.");
  131 + result = this.indexAll();
  132 + } else {
  133 + SolrQuery solrParams = new SolrQuery("*:*")
  134 + .addField("item.lastmodified")
  135 + .addSortField("item.lastmodified", ORDER.desc).setRows(1);
  136 +
  137 + SolrDocumentList results = DSpaceSolrSearch.query(solrServerResolver.getServer(), solrParams);
  138 + if (results.getNumFound() == 0) {
  139 + System.out.println("There are no indexed documents, using full import.");
  140 + result = this.indexAll();
  141 + } else
  142 + result = this.index((Date) results.get(0).getFieldValue("item.lastmodified"));
  143 +
  144 + }
  145 + solrServerResolver.getServer().commit();
  146 +
  147 +
  148 + if (optimize) {
  149 + println("Optimizing Index");
  150 + solrServerResolver.getServer().optimize();
  151 + println("Index optimized");
  152 + }
  153 +
  154 + // Set last compilation date
  155 + xoaiLastCompilationCacheService.put(new Date());
  156 + return result;
  157 + } catch (DSpaceSolrException ex) {
  158 + throw new DSpaceSolrIndexerException(ex.getMessage(), ex);
  159 + } catch (SolrServerException ex) {
  160 + throw new DSpaceSolrIndexerException(ex.getMessage(), ex);
  161 + } catch (IOException ex) {
  162 + throw new DSpaceSolrIndexerException(ex.getMessage(), ex);
  163 + }
  164 + }
  165 +
  166 + private int index(Date last) throws DSpaceSolrIndexerException {
  167 + System.out
  168 + .println("Incremental import. Searching for documents modified after: "
  169 + + last.toString());
  170 + // Index both in_archive items AND withdrawn items. Withdrawn items will be flagged withdrawn
  171 + // (in order to notify external OAI harvesters of their new status)
  172 + String sqlQuery = "SELECT item_id FROM item WHERE (in_archive=TRUE OR withdrawn=TRUE) AND discoverable=TRUE AND last_modified > ?";
  173 + if(DatabaseManager.isOracle()){
  174 + sqlQuery = "SELECT item_id FROM item WHERE (in_archive=1 OR withdrawn=1) AND discoverable=1 AND last_modified > ?";
  175 + }
  176 +
  177 + try {
  178 + TableRowIterator iterator = DatabaseManager
  179 + .query(context,
  180 + sqlQuery,
  181 + new java.sql.Timestamp(last.getTime()));
  182 + return this.index(iterator);
  183 + } catch (SQLException ex) {
  184 + throw new DSpaceSolrIndexerException(ex.getMessage(), ex);
  185 + }
  186 + }
  187 +
  188 + private int indexAll() throws DSpaceSolrIndexerException {
  189 + System.out.println("Full import");
  190 + try {
  191 + // Index both in_archive items AND withdrawn items. Withdrawn items will be flagged withdrawn
  192 + // (in order to notify external OAI harvesters of their new status)
  193 + String sqlQuery = "SELECT item_id FROM item WHERE (in_archive=TRUE OR withdrawn=TRUE) AND discoverable=TRUE";
  194 + if(DatabaseManager.isOracle()){
  195 + sqlQuery = "SELECT item_id FROM item WHERE (in_archive=1 OR withdrawn=1) AND discoverable=1";
  196 + }
  197 +
  198 + TableRowIterator iterator = DatabaseManager.query(context,
  199 + sqlQuery);
  200 + return this.index(iterator);
  201 + } catch (SQLException ex) {
  202 + throw new DSpaceSolrIndexerException(ex.getMessage(), ex);
  203 + }
  204 + }
  205 +
  206 + private int index(TableRowIterator iterator)
  207 + throws DSpaceSolrIndexerException {
  208 + try {
  209 + int i = 0;
  210 + SolrServer server = solrServerResolver.getServer();
  211 + while (iterator.hasNext()) {
  212 + try {
  213 + server.add(this.index(find(context, iterator.next().getIntColumn("item_id"))));
  214 + context.clearCache();
  215 + } catch (SQLException ex) {
  216 + log.error(ex.getMessage(), ex);
  217 + } catch (MetadataBindException e) {
  218 + log.error(e.getMessage(), e);
  219 + } catch (ParseException e) {
  220 + log.error(e.getMessage(), e);
  221 + } catch (XMLStreamException e) {
  222 + log.error(e.getMessage(), e);
  223 + } catch (WritingXmlException e) {
  224 + log.error(e.getMessage(), e);
  225 + }
  226 + i++;
  227 + if (i % 100 == 0) System.out.println(i + " items imported so far...");
  228 + }
  229 + System.out.println("Total: " + i + " items");
  230 + server.commit();
  231 + return i;
  232 + } catch (SQLException ex) {
  233 + throw new DSpaceSolrIndexerException(ex.getMessage(), ex);
  234 + } catch (SolrServerException ex) {
  235 + throw new DSpaceSolrIndexerException(ex.getMessage(), ex);
  236 + } catch (IOException ex) {
  237 + throw new DSpaceSolrIndexerException(ex.getMessage(), ex);
  238 + }
  239 + }
  240 +
  241 + private SolrInputDocument index(Item item) throws SQLException, MetadataBindException, ParseException, XMLStreamException, WritingXmlException {
  242 + SolrInputDocument doc = new SolrInputDocument();
  243 + doc.addField("item.id", item.getID());
  244 + boolean pub = this.isPublic(item);
  245 + doc.addField("item.public", pub);
  246 + String handle = item.getHandle();
  247 + doc.addField("item.handle", handle);
  248 + doc.addField("item.lastmodified", item.getLastModified());
  249 + if (item.getSubmitter() != null) {
  250 + doc.addField("item.submitter", item.getSubmitter().getEmail());
  251 + }
  252 + doc.addField("item.deleted", item.isWithdrawn() ? "true" : "false");
  253 + for (Collection col : item.getCollections())
  254 + doc.addField("item.collections",
  255 + "col_" + col.getHandle().replace("/", "_"));
  256 + for (Community com : collectionsService.flatParentCommunities(item))
  257 + doc.addField("item.communities",
  258 + "com_" + com.getHandle().replace("/", "_"));
  259 +
  260 + Metadatum[] allData = item.getMetadata(Item.ANY, Item.ANY, Item.ANY,
  261 + Item.ANY);
  262 + for (Metadatum dc : allData) {
  263 + String key = "metadata." + dc.schema + "." + dc.element;
  264 + if (dc.qualifier != null) {
  265 + key += "." + dc.qualifier;
  266 + }
  267 + doc.addField(key, dc.value);
  268 + if (dc.authority != null) {
  269 + doc.addField(key + ".authority", dc.authority);
  270 + doc.addField(key + ".confidence", dc.confidence + "");
  271 + }
  272 + }
  273 +
  274 + for (String f : getFileFormats(item)) {
  275 + doc.addField("metadata.dc.format.mimetype", f);
  276 + }
  277 +
  278 + if (verbose) {
  279 + //RCAAP: BEFORE HARVEST SEE ITEM HANDLE
  280 + System.out.print("Item with handle " + handle + "... ");
  281 +
  282 + }
  283 +
  284 + ByteArrayOutputStream out = new ByteArrayOutputStream();
  285 + XmlOutputContext context = XmlOutputContext.emptyContext(out, Second);
  286 + retrieveMetadata(item).write(context);
  287 + context.getWriter().flush();
  288 + context.getWriter().close();
  289 + doc.addField("item.compile", out.toString());
  290 +
  291 +
  292 + //RCAAP: After processing item add index
  293 + if (verbose) {
  294 + //println("Item with handle " + handle + " indexed");
  295 + println("indexed!");
  296 + }
  297 +
  298 +
  299 + return doc;
  300 + }
  301 +
  302 + private boolean isPublic(Item item) {
  303 + boolean pub = false;
  304 + try {
  305 + //Check if READ access allowed on this Item
  306 + pub = AuthorizeManager.authorizeActionBoolean(context, item, Constants.READ);
  307 + } catch (SQLException ex) {
  308 + log.error(ex.getMessage());
  309 + }
  310 + return pub;
  311 + }
  312 +
  313 +
  314 + private static boolean getKnownExplanation(Throwable t) {
  315 + if (t instanceof ConnectException) {
  316 + System.err.println("Solr server ("
  317 + + ConfigurationManager.getProperty("oai", "solr.url")
  318 + + ") is down, turn it on.");
  319 + return true;
  320 + }
  321 +
  322 + return false;
  323 + }
  324 +
  325 + private static boolean searchForReason(Throwable t) {
  326 + if (getKnownExplanation(t))
  327 + return true;
  328 + if (t.getCause() != null)
  329 + return searchForReason(t.getCause());
  330 + return false;
  331 + }
  332 +
  333 + private void clearIndex() throws DSpaceSolrIndexerException {
  334 + try {
  335 + System.out.println("Clearing index");
  336 + solrServerResolver.getServer().deleteByQuery("*:*");
  337 + solrServerResolver.getServer().commit();
  338 + System.out.println("Index cleared");
  339 + } catch (SolrServerException ex) {
  340 + throw new DSpaceSolrIndexerException(ex.getMessage(), ex);
  341 + } catch (IOException ex) {
  342 + throw new DSpaceSolrIndexerException(ex.getMessage(), ex);
  343 + }
  344 + }
  345 +
  346 + private static void cleanCache(XOAIItemCacheService xoaiItemCacheService, XOAICacheService xoaiCacheService) throws IOException {
  347 + System.out.println("Purging cached OAI responses.");
  348 + xoaiItemCacheService.deleteAll();
  349 + xoaiCacheService.deleteAll();
  350 + }
  351 +
  352 + private static final String COMMAND_IMPORT = "import";
  353 + private static final String COMMAND_CLEAN_CACHE = "clean-cache";
  354 + private static final String COMMAND_COMPILE_ITEMS = "compile-items";
  355 + private static final String COMMAND_ERASE_COMPILED_ITEMS = "erase-compiled-items";
  356 +
  357 + public static void main(String[] argv) throws IOException, ConfigurationException {
  358 +
  359 +
  360 + AnnotationConfigApplicationContext applicationContext = new AnnotationConfigApplicationContext(new Class[]{
  361 + BasicConfiguration.class
  362 + });
  363 +
  364 + ConfigurationService configurationService = applicationContext.getBean(ConfigurationService.class);
  365 + XOAICacheService cacheService = applicationContext.getBean(XOAICacheService.class);
  366 + XOAIItemCacheService itemCacheService = applicationContext.getBean(XOAIItemCacheService.class);
  367 +
  368 + Context ctx = null;
  369 +
  370 + try {
  371 + CommandLineParser parser = new PosixParser();
  372 + Options options = new Options();
  373 + options.addOption("c", "clear", false, "Clear index before indexing");
  374 + options.addOption("o", "optimize", false,
  375 + "Optimize index at the end");
  376 + options.addOption("v", "verbose", false, "Verbose output");
  377 + options.addOption("h", "help", false, "Shows some help");
  378 + options.addOption("n", "number", true, "FOR DEVELOPMENT MUST DELETE");
  379 + CommandLine line = parser.parse(options, argv);
  380 +
  381 + String[] validSolrCommands = {COMMAND_IMPORT, COMMAND_CLEAN_CACHE};
  382 + String[] validDatabaseCommands = {COMMAND_CLEAN_CACHE, COMMAND_COMPILE_ITEMS, COMMAND_ERASE_COMPILED_ITEMS};
  383 +
  384 +
  385 + boolean solr = true; // Assuming solr by default
  386 + solr = !("database").equals(configurationService.getProperty("oai", "storage"));
  387 +
  388 +
  389 + boolean run = false;
  390 + if (line.getArgs().length > 0) {
  391 + if (solr) {
  392 + if (Arrays.asList(validSolrCommands).contains(line.getArgs()[0])) {
  393 + run = true;
  394 + }
  395 + } else {
  396 + if (Arrays.asList(validDatabaseCommands).contains(line.getArgs()[0])) {
  397 + run = true;
  398 + }
  399 + }
  400 + }
  401 +
  402 + if (!line.hasOption('h') && run) {
  403 + System.out.println("OAI 2.0 manager action started");
  404 + long start = System.currentTimeMillis();
  405 +
  406 + String command = line.getArgs()[0];
  407 +
  408 + if (COMMAND_IMPORT.equals(command)) {
  409 + ctx = new Context();
  410 + XOAI indexer = new XOAI(ctx,
  411 + line.hasOption('o'),
  412 + line.hasOption('c'),
  413 + line.hasOption('v'));
  414 +
  415 + applicationContext.getAutowireCapableBeanFactory().autowireBean(indexer);
  416 +
  417 + int imported = indexer.index();
  418 + if (imported > 0) cleanCache(itemCacheService, cacheService);
  419 + } else if (COMMAND_CLEAN_CACHE.equals(command)) {
  420 + cleanCache(itemCacheService, cacheService);
  421 + } else if (COMMAND_COMPILE_ITEMS.equals(command)) {
  422 +
  423 + ctx = new Context();
  424 + XOAI indexer = new XOAI(ctx, line.hasOption('v'));
  425 + applicationContext.getAutowireCapableBeanFactory().autowireBean(indexer);
  426 +
  427 + indexer.compile();
  428 +
  429 + cleanCache(itemCacheService, cacheService);
  430 + } else if (COMMAND_ERASE_COMPILED_ITEMS.equals(command)) {
  431 + cleanCompiledItems(itemCacheService);
  432 + cleanCache(itemCacheService, cacheService);
  433 + }
  434 +
  435 + System.out.println("OAI 2.0 manager action ended. It took "
  436 + + ((System.currentTimeMillis() - start) / 1000)
  437 + + " seconds.");
  438 + } else {
  439 + usage();
  440 + }
  441 + } catch (Throwable ex) {
  442 + if (!searchForReason(ex)) {
  443 + ex.printStackTrace();
  444 + }
  445 + log.error(ex.getMessage(), ex);
  446 + }
  447 + finally
  448 + {
  449 + // Abort our context, if still open
  450 + if(ctx!=null && ctx.isValid())
  451 + ctx.abort();
  452 + }
  453 + }
  454 +
  455 + private static void cleanCompiledItems(XOAIItemCacheService itemCacheService) throws IOException {
  456 + System.out.println("Purging compiled items");
  457 + itemCacheService.deleteAll();
  458 + }
  459 +
  460 + private void compile() throws CompilingException {
  461 + ItemIterator iterator;
  462 + try {
  463 + Date last = xoaiLastCompilationCacheService.get();
  464 +
  465 + if (last == null) {
  466 + System.out.println("Retrieving all items to be compiled");
  467 + iterator = Item.findAll(context);
  468 + } else {
  469 + System.out.println("Retrieving items modified after " + last + " to be compiled");
  470 + String query = "SELECT * FROM item WHERE last_modified>?";
  471 + iterator = new ItemIterator(context, DatabaseManager.query(context, query, new java.sql.Date(last.getTime())));
  472 + }
  473 +
  474 + while (iterator.hasNext()) {
  475 + Item item = iterator.next();
  476 + if (verbose) System.out.println("Compiling item with handle: " + item.getHandle());
  477 + xoaiItemCacheService.put(item, retrieveMetadata(item));
  478 + context.clearCache();
  479 + }
  480 +
  481 + xoaiLastCompilationCacheService.put(new Date());
  482 + } catch (SQLException e) {
  483 + throw new CompilingException(e);
  484 + } catch (IOException e) {
  485 + throw new CompilingException(e);
  486 + }
  487 + System.out.println("Items compiled");
  488 + }
  489 +
  490 + private static void usage() {
  491 + boolean solr = true; // Assuming solr by default
  492 + solr = !("database").equals(ConfigurationManager.getProperty("oai", "storage"));
  493 +
  494 + if (solr) {
  495 + System.out.println("OAI Manager Script");
  496 + System.out.println("Syntax: oai <action> [parameters]");
  497 + System.out.println("> Possible actions:");
  498 + System.out.println(" " + COMMAND_IMPORT + " - To import DSpace items into OAI index and cache system");
  499 + System.out.println(" " + COMMAND_CLEAN_CACHE + " - Cleans the OAI cached responses");
  500 + System.out.println("> Parameters:");
  501 + System.out.println(" -o Optimize index after indexing (" + COMMAND_IMPORT + " only)");
  502 + System.out.println(" -c Clear index (" + COMMAND_IMPORT + " only)");
  503 + System.out.println(" -v Verbose output");
  504 + System.out.println(" -h Shows this text");
  505 + } else {
  506 + System.out.println("OAI Manager Script");
  507 + System.out.println("Syntax: oai <action> [parameters]");
  508 + System.out.println("> Possible actions:");
  509 + System.out.println(" " + COMMAND_CLEAN_CACHE + " - Cleans the OAI cached responses");
  510 + System.out.println(" " + COMMAND_COMPILE_ITEMS + " - Compiles all DSpace items");
  511 + System.out.println(" " + COMMAND_ERASE_COMPILED_ITEMS + " - Erase the OAI compiled items");
  512 + System.out.println("> Parameters:");
  513 + System.out.println(" -v Verbose output");
  514 + System.out.println(" -h Shows this text");
  515 + }
  516 + }
  517 +}
... ...