- DaBInImporter: Aktueller Stand
- PDFBox-Bibliothek gegen aktuelle Version ausgetauscht. PDFBox wird zur Auswertung von PDFs für die Suche verwendet. Die vorhandene Version war relativ alt und hatte mit einigen PDFs aus DaBIn
Probleme, möglicherweise aufgrund einer neueren Version des PDF-Formates. Die aktuelle Version von PDFBox scheint diese PDFs problemlos verarbeiten zu können. Zur Integration waren folgende
Änderungen notwendig:
- Einfügen einiger weiterer Bibliotheken (fontbox, bcmail, icu4j, jempbox), die von PDFBox benötigt werden
- Austausch von commons-logging-1.0 durch commons-logging-1.1.1
- Anpassen der Importe in ccm-core/src/com/arsdigita/search/converter/PDFConverter.java. PDFBox ist mittlerweile ein Apache-Projekt, daher wurden die Pakete von org.pdfbox nach org.apache.pdfbox
umbehannt. Die Namen der Klassen und die Interfaces der Klassen sind gleich geblieben.
git-svn-id: https://svn.libreccm.org/ccm/trunk@663 8810af33-2d31-482b-a856-94f89814c4df
master
parent
fbc7372b64
commit
906a804323
|
|
@ -2144,7 +2144,12 @@ public class DaBInImporter extends Program {
|
|||
Term term;
|
||||
term =
|
||||
termsDomain.getTerm(currentProjectsTerm.getUniqueID());
|
||||
term.addObject(project);
|
||||
if (projectDe != null) {
|
||||
term.addObject(projectDe);
|
||||
}
|
||||
if (projectEn != null) {
|
||||
term.addObject(projectEn);
|
||||
}
|
||||
term.save();
|
||||
}
|
||||
|
||||
|
|
@ -2629,13 +2634,19 @@ public class DaBInImporter extends Program {
|
|||
}
|
||||
|
||||
insertIntoAZFolder(publication, publicationsAlpha);
|
||||
Term term = termsDomain.getTerm(publicationTerms.get(Integer.
|
||||
toString(((Publication) publication.getPrimaryInstance()).
|
||||
getYearOfPublication())).getUniqueID());
|
||||
Term term = publicationTerms.get(Integer.toString(((Publication) publication.
|
||||
getPrimaryInstance()).
|
||||
getYearOfPublication()));
|
||||
if (term == null) {
|
||||
term = publicationsTerm;
|
||||
}
|
||||
term.addObject(publication);
|
||||
term = termsDomain.getTerm(term.getUniqueID());
|
||||
if (publicationDe != null) {
|
||||
term.addObject(publicationDe);
|
||||
}
|
||||
if (publicationEn != null) {
|
||||
term.addObject(publicationEn);
|
||||
}
|
||||
term.save();
|
||||
}
|
||||
};
|
||||
|
|
@ -2738,19 +2749,29 @@ public class DaBInImporter extends Program {
|
|||
//publications.addItem(workingPaper);
|
||||
workingPaperMap.put(workingPaperData.getDabinId(), workingPaper);
|
||||
insertIntoAZFolder(workingPaper, publicationsAlpha);
|
||||
WorkingPaper primary = (WorkingPaper) workingPaper.getPrimaryInstance();
|
||||
String yearStr = Integer.toString(primary.getYearOfPublication());
|
||||
WorkingPaper primary = (WorkingPaper) workingPaper.
|
||||
getPrimaryInstance();
|
||||
String yearStr =
|
||||
Integer.toString(primary.getYearOfPublication());
|
||||
Term term = workingPaperTerms.get(yearStr);
|
||||
if (term == null) {
|
||||
System.out.printf(
|
||||
"***WARNING: Term for year '%s' not found. Using basic term.",
|
||||
yearStr);
|
||||
term = workingPapersTerm;
|
||||
}
|
||||
term = termsDomain.getTerm(term.getUniqueID());
|
||||
term.addObject(workingPaper);
|
||||
if (workingPaperDe != null) {
|
||||
term.addObject(workingPaperDe);
|
||||
}
|
||||
if (workingPaperEn != null) {
|
||||
term.addObject(workingPaperEn);
|
||||
}
|
||||
term.save();
|
||||
|
||||
System.out.println("\tOK");
|
||||
|
||||
System.out.print("\tAssigning file...");
|
||||
System.out.print("\tAssigning file...\n ");
|
||||
if (workingPaperData.getFile() == null) {
|
||||
System.out.println("No file found.");
|
||||
} else {
|
||||
|
|
@ -2791,23 +2812,23 @@ public class DaBInImporter extends Program {
|
|||
getPrimaryInstance()).
|
||||
getTitle());
|
||||
if (title.length() > 200) {
|
||||
fsi.setTitle(title.substring(0, 199));
|
||||
fsi.setTitle(title.substring(0, 200));
|
||||
} else {
|
||||
fsi.setTitle(title);
|
||||
}
|
||||
|
||||
String name = String.format("datei_%s",
|
||||
String name = String.format("datei_%s.pdf",
|
||||
((WorkingPaper) workingPaper.
|
||||
getPrimaryInstance()).
|
||||
getName());
|
||||
if (name.length() > 200) {
|
||||
fsi.setName(name.substring(0, 199));
|
||||
} else {
|
||||
fsi.setName(name);
|
||||
name = name.substring(0, 200);
|
||||
}
|
||||
fsi.setName(name);
|
||||
FileAsset file = new FileAsset();
|
||||
file.loadFromFile(workingPaper.getPrimaryInstance().
|
||||
getName(), pdf, "application/octet-stream");
|
||||
file.loadFromFile(name,
|
||||
pdf,
|
||||
"application/pdf");
|
||||
fsi.setFile(file);
|
||||
file.setContentSection(section);
|
||||
fsi.setContentSection(section);
|
||||
|
|
@ -2818,13 +2839,13 @@ public class DaBInImporter extends Program {
|
|||
bundle.setDefaultLanguage("de");
|
||||
|
||||
RelatedLink download = new RelatedLink();
|
||||
download.setTitle("download");
|
||||
download.setTitle("Download");
|
||||
download.setTargetType(Link.INTERNAL_LINK);
|
||||
download.setTargetItem(fsi);
|
||||
download.setLinkOwner(workingPaperDe);
|
||||
|
||||
download = new RelatedLink();
|
||||
download.setTitle("download");
|
||||
download.setTitle("Download");
|
||||
download.setTargetType(Link.INTERNAL_LINK);
|
||||
download.setTargetItem(fsi);
|
||||
download.setLinkOwner(workingPaperEn);
|
||||
|
|
@ -2895,10 +2916,10 @@ public class DaBInImporter extends Program {
|
|||
myPublication.setTargetType(Link.INTERNAL_LINK);
|
||||
myPublication.setTargetItem(workingPaperEn);
|
||||
if (workingPaperEn.getTitle().length() > 180) {
|
||||
myPublication.setTitle(workingPaperDe.getTitle().
|
||||
myPublication.setTitle(workingPaperEn.getTitle().
|
||||
substring(0, 180));
|
||||
} else {
|
||||
myPublication.setTitle(workingPaperDe.getTitle());
|
||||
myPublication.setTitle(workingPaperEn.getTitle());
|
||||
}
|
||||
myPublication.setLinkListName("MyPublications");
|
||||
myPublication.save();
|
||||
|
|
@ -3227,6 +3248,7 @@ public class DaBInImporter extends Program {
|
|||
System.out.printf("Term '%s' does not exist. Creating...\n",
|
||||
token);
|
||||
createTerm(uniqueId, name, termsDomain, prevTerm);
|
||||
term = termsDomain.getTerm(uniqueId);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -3329,7 +3351,8 @@ public class DaBInImporter extends Program {
|
|||
}
|
||||
}
|
||||
} catch (SQLException ex) {
|
||||
System.err.println("Query for publication years of working papers failed.");
|
||||
System.err.println(
|
||||
"Query for publication years of working papers failed.");
|
||||
ex.printStackTrace(System.err);
|
||||
}
|
||||
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -20,9 +20,9 @@ package com.arsdigita.search.converter;
|
|||
|
||||
import java.io.InputStream;
|
||||
import java.io.IOException;
|
||||
import org.pdfbox.pdfparser.PDFParser;
|
||||
import org.pdfbox.util.PDFTextStripper;
|
||||
import org.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdfparser.PDFParser;
|
||||
import org.apache.pdfbox.util.PDFTextStripper;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
|
||||
/**
|
||||
* This class provides the mechanism to perform a conversion from
|
||||
|
|
|
|||
Loading…
Reference in New Issue