- DaBInImporter: Aktueller Stand
- PDFBox-Bibliothek gegen aktuelle Version ausgetauscht. PDFBox wird zur Auswertung von PDFs für die Suche verwendet. Die vorhandene Version war relativ alt und hatte mit einigen PDFs aus DaBIn
Probleme, möglicherweise aufgrund einer neueren Version des PDF-Formates. Die aktuelle Version von PDFBox scheint diese PDFs problemlos verarbeiten zu können. Zur Integration waren folgende
Änderungen notwendig:
- Einfügen einiger weiterer Bibliotheken (fontbox, bcmail, icu4j, jempbox), die von PDFBox benötigt werden
- Austausch von commons-logging-1.0 durch commons-logging-1.1.1
- Anpassen der Importe in ccm-core/src/com/arsdigita/search/converter/PDFConverter.java. PDFBox ist mittlerweile ein Apache-Projekt, daher wurden die Pakete von org.pdfbox nach org.apache.pdfbox
umbehannt. Die Namen der Klassen und die Interfaces der Klassen sind gleich geblieben.
git-svn-id: https://svn.libreccm.org/ccm/trunk@663 8810af33-2d31-482b-a856-94f89814c4df
master
parent
fbc7372b64
commit
906a804323
|
|
@ -2144,7 +2144,12 @@ public class DaBInImporter extends Program {
|
||||||
Term term;
|
Term term;
|
||||||
term =
|
term =
|
||||||
termsDomain.getTerm(currentProjectsTerm.getUniqueID());
|
termsDomain.getTerm(currentProjectsTerm.getUniqueID());
|
||||||
term.addObject(project);
|
if (projectDe != null) {
|
||||||
|
term.addObject(projectDe);
|
||||||
|
}
|
||||||
|
if (projectEn != null) {
|
||||||
|
term.addObject(projectEn);
|
||||||
|
}
|
||||||
term.save();
|
term.save();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -2629,13 +2634,19 @@ public class DaBInImporter extends Program {
|
||||||
}
|
}
|
||||||
|
|
||||||
insertIntoAZFolder(publication, publicationsAlpha);
|
insertIntoAZFolder(publication, publicationsAlpha);
|
||||||
Term term = termsDomain.getTerm(publicationTerms.get(Integer.
|
Term term = publicationTerms.get(Integer.toString(((Publication) publication.
|
||||||
toString(((Publication) publication.getPrimaryInstance()).
|
getPrimaryInstance()).
|
||||||
getYearOfPublication())).getUniqueID());
|
getYearOfPublication()));
|
||||||
if (term == null) {
|
if (term == null) {
|
||||||
term = publicationsTerm;
|
term = publicationsTerm;
|
||||||
}
|
}
|
||||||
term.addObject(publication);
|
term = termsDomain.getTerm(term.getUniqueID());
|
||||||
|
if (publicationDe != null) {
|
||||||
|
term.addObject(publicationDe);
|
||||||
|
}
|
||||||
|
if (publicationEn != null) {
|
||||||
|
term.addObject(publicationEn);
|
||||||
|
}
|
||||||
term.save();
|
term.save();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
@ -2738,19 +2749,29 @@ public class DaBInImporter extends Program {
|
||||||
//publications.addItem(workingPaper);
|
//publications.addItem(workingPaper);
|
||||||
workingPaperMap.put(workingPaperData.getDabinId(), workingPaper);
|
workingPaperMap.put(workingPaperData.getDabinId(), workingPaper);
|
||||||
insertIntoAZFolder(workingPaper, publicationsAlpha);
|
insertIntoAZFolder(workingPaper, publicationsAlpha);
|
||||||
WorkingPaper primary = (WorkingPaper) workingPaper.getPrimaryInstance();
|
WorkingPaper primary = (WorkingPaper) workingPaper.
|
||||||
String yearStr = Integer.toString(primary.getYearOfPublication());
|
getPrimaryInstance();
|
||||||
|
String yearStr =
|
||||||
|
Integer.toString(primary.getYearOfPublication());
|
||||||
Term term = workingPaperTerms.get(yearStr);
|
Term term = workingPaperTerms.get(yearStr);
|
||||||
if (term == null) {
|
if (term == null) {
|
||||||
|
System.out.printf(
|
||||||
|
"***WARNING: Term for year '%s' not found. Using basic term.",
|
||||||
|
yearStr);
|
||||||
term = workingPapersTerm;
|
term = workingPapersTerm;
|
||||||
}
|
}
|
||||||
term = termsDomain.getTerm(term.getUniqueID());
|
term = termsDomain.getTerm(term.getUniqueID());
|
||||||
term.addObject(workingPaper);
|
if (workingPaperDe != null) {
|
||||||
|
term.addObject(workingPaperDe);
|
||||||
|
}
|
||||||
|
if (workingPaperEn != null) {
|
||||||
|
term.addObject(workingPaperEn);
|
||||||
|
}
|
||||||
term.save();
|
term.save();
|
||||||
|
|
||||||
System.out.println("\tOK");
|
System.out.println("\tOK");
|
||||||
|
|
||||||
System.out.print("\tAssigning file...");
|
System.out.print("\tAssigning file...\n ");
|
||||||
if (workingPaperData.getFile() == null) {
|
if (workingPaperData.getFile() == null) {
|
||||||
System.out.println("No file found.");
|
System.out.println("No file found.");
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -2791,23 +2812,23 @@ public class DaBInImporter extends Program {
|
||||||
getPrimaryInstance()).
|
getPrimaryInstance()).
|
||||||
getTitle());
|
getTitle());
|
||||||
if (title.length() > 200) {
|
if (title.length() > 200) {
|
||||||
fsi.setTitle(title.substring(0, 199));
|
fsi.setTitle(title.substring(0, 200));
|
||||||
} else {
|
} else {
|
||||||
fsi.setTitle(title);
|
fsi.setTitle(title);
|
||||||
}
|
}
|
||||||
|
|
||||||
String name = String.format("datei_%s",
|
String name = String.format("datei_%s.pdf",
|
||||||
((WorkingPaper) workingPaper.
|
((WorkingPaper) workingPaper.
|
||||||
getPrimaryInstance()).
|
getPrimaryInstance()).
|
||||||
getName());
|
getName());
|
||||||
if (name.length() > 200) {
|
if (name.length() > 200) {
|
||||||
fsi.setName(name.substring(0, 199));
|
name = name.substring(0, 200);
|
||||||
} else {
|
|
||||||
fsi.setName(name);
|
|
||||||
}
|
}
|
||||||
|
fsi.setName(name);
|
||||||
FileAsset file = new FileAsset();
|
FileAsset file = new FileAsset();
|
||||||
file.loadFromFile(workingPaper.getPrimaryInstance().
|
file.loadFromFile(name,
|
||||||
getName(), pdf, "application/octet-stream");
|
pdf,
|
||||||
|
"application/pdf");
|
||||||
fsi.setFile(file);
|
fsi.setFile(file);
|
||||||
file.setContentSection(section);
|
file.setContentSection(section);
|
||||||
fsi.setContentSection(section);
|
fsi.setContentSection(section);
|
||||||
|
|
@ -2818,13 +2839,13 @@ public class DaBInImporter extends Program {
|
||||||
bundle.setDefaultLanguage("de");
|
bundle.setDefaultLanguage("de");
|
||||||
|
|
||||||
RelatedLink download = new RelatedLink();
|
RelatedLink download = new RelatedLink();
|
||||||
download.setTitle("download");
|
download.setTitle("Download");
|
||||||
download.setTargetType(Link.INTERNAL_LINK);
|
download.setTargetType(Link.INTERNAL_LINK);
|
||||||
download.setTargetItem(fsi);
|
download.setTargetItem(fsi);
|
||||||
download.setLinkOwner(workingPaperDe);
|
download.setLinkOwner(workingPaperDe);
|
||||||
|
|
||||||
download = new RelatedLink();
|
download = new RelatedLink();
|
||||||
download.setTitle("download");
|
download.setTitle("Download");
|
||||||
download.setTargetType(Link.INTERNAL_LINK);
|
download.setTargetType(Link.INTERNAL_LINK);
|
||||||
download.setTargetItem(fsi);
|
download.setTargetItem(fsi);
|
||||||
download.setLinkOwner(workingPaperEn);
|
download.setLinkOwner(workingPaperEn);
|
||||||
|
|
@ -2895,10 +2916,10 @@ public class DaBInImporter extends Program {
|
||||||
myPublication.setTargetType(Link.INTERNAL_LINK);
|
myPublication.setTargetType(Link.INTERNAL_LINK);
|
||||||
myPublication.setTargetItem(workingPaperEn);
|
myPublication.setTargetItem(workingPaperEn);
|
||||||
if (workingPaperEn.getTitle().length() > 180) {
|
if (workingPaperEn.getTitle().length() > 180) {
|
||||||
myPublication.setTitle(workingPaperDe.getTitle().
|
myPublication.setTitle(workingPaperEn.getTitle().
|
||||||
substring(0, 180));
|
substring(0, 180));
|
||||||
} else {
|
} else {
|
||||||
myPublication.setTitle(workingPaperDe.getTitle());
|
myPublication.setTitle(workingPaperEn.getTitle());
|
||||||
}
|
}
|
||||||
myPublication.setLinkListName("MyPublications");
|
myPublication.setLinkListName("MyPublications");
|
||||||
myPublication.save();
|
myPublication.save();
|
||||||
|
|
@ -3227,6 +3248,7 @@ public class DaBInImporter extends Program {
|
||||||
System.out.printf("Term '%s' does not exist. Creating...\n",
|
System.out.printf("Term '%s' does not exist. Creating...\n",
|
||||||
token);
|
token);
|
||||||
createTerm(uniqueId, name, termsDomain, prevTerm);
|
createTerm(uniqueId, name, termsDomain, prevTerm);
|
||||||
|
term = termsDomain.getTerm(uniqueId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -3329,7 +3351,8 @@ public class DaBInImporter extends Program {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (SQLException ex) {
|
} catch (SQLException ex) {
|
||||||
System.err.println("Query for publication years of working papers failed.");
|
System.err.println(
|
||||||
|
"Query for publication years of working papers failed.");
|
||||||
ex.printStackTrace(System.err);
|
ex.printStackTrace(System.err);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -20,9 +20,9 @@ package com.arsdigita.search.converter;
|
||||||
|
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import org.pdfbox.pdfparser.PDFParser;
|
import org.apache.pdfbox.pdfparser.PDFParser;
|
||||||
import org.pdfbox.util.PDFTextStripper;
|
import org.apache.pdfbox.util.PDFTextStripper;
|
||||||
import org.pdfbox.pdmodel.PDDocument;
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class provides the mechanism to perform a conversion from
|
* This class provides the mechanism to perform a conversion from
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue