Commit 06333379289f8ea151b97cd4de9ddb4612639a06

Authored by Fernando Ribeiro
1 parent 2cd78c8bcc
Exists in master

Changed to version tika-app-1.13.jar

lib/tika-app-0.10.jap
No preview for this file type
lib/tika-app-0.10.jar
No preview for this file type
lib/tika-app-1.13.jar
No preview for this file type
src/pt/keep/metarepository/core/app/HarvestApp.java
... ... @@ -601,7 +601,7 @@ public class HarvestApp {
601 601 try {
602 602 String baseURL = actualArchive.getHomePage();
603 603 String handle = DatabaseIngester.getLink(metadata.getIdentifierList());
604   - Collection<String> documentsURL = HttpUtility.extractDocumentsLinksFromURL(handle, baseURL, "(pdf|doc)");
  604 + Collection<String> documentsURL = HttpUtility.extractDocumentsLinksFromURL(handle, baseURL, "(pdf|docx?)");
605 605  
606 606 if (documentsURL.size() == 0) {
607 607 logger.info(" FAILED to extract fulltext. No documents were found in the metadata page.");
... ...