Aktueller Stand des DaBIn importers. Alle Publikationstypen und Arbeitspapiere. Dateien der Arbeitspapiere werden noch nicht korrekt verarbeitet, die temporäre Datei, die zum Entpacken der PDFs angelegt wird, scheint keine ZIP-Datei zu sein.

git-svn-id: https://svn.libreccm.org/ccm/trunk@651 8810af33-2d31-482b-a856-94f89814c4df
master
jensp 2010-12-16 19:54:44 +00:00
parent b95dcfeb80
commit cc6b05f816
4 changed files with 603 additions and 51 deletions

View File

@ -15,6 +15,7 @@
<ccm:requires name="ccm-sci-types-organization" version="6.6.0" relation="ge"/> <ccm:requires name="ccm-sci-types-organization" version="6.6.0" relation="ge"/>
<ccm:requires name="ccm-sci-publications" version="6.6.0" relation="ge"/> <ccm:requires name="ccm-sci-publications" version="6.6.0" relation="ge"/>
<ccm:requires name="ccm-cms-assets-relatedlink" version="6.6.0" relation="ge"/> <ccm:requires name="ccm-cms-assets-relatedlink" version="6.6.0" relation="ge"/>
<ccm:requires name="ccm-cms-types-filestorageitem" version="6.6.0" relation="ge"/>
</ccm:dependencies> </ccm:dependencies>
<ccm:contacts> <ccm:contacts>
<ccm:contact uri="http://www.pwi.uni-bremen.de" type="website"/> <ccm:contact uri="http://www.pwi.uni-bremen.de" type="website"/>

View File

@ -21,13 +21,19 @@ package com.arsdigita.cms.dabin;
import com.arsdigita.cms.ContentBundle; import com.arsdigita.cms.ContentBundle;
import com.arsdigita.cms.ContentSection; import com.arsdigita.cms.ContentSection;
import com.arsdigita.cms.FileAsset;
import com.arsdigita.cms.Folder; import com.arsdigita.cms.Folder;
import com.arsdigita.cms.ItemCollection; import com.arsdigita.cms.ItemCollection;
import com.arsdigita.cms.contentassets.RelatedLink; import com.arsdigita.cms.contentassets.RelatedLink;
import com.arsdigita.cms.contenttypes.Address; import com.arsdigita.cms.contenttypes.Address;
import com.arsdigita.cms.contenttypes.ArticleInCollectedVolume;
import com.arsdigita.cms.contenttypes.ArticleInJournal;
import com.arsdigita.cms.contenttypes.CollectedVolume;
import com.arsdigita.cms.contenttypes.Contact; import com.arsdigita.cms.contenttypes.Contact;
import com.arsdigita.cms.contenttypes.FileStorageItem;
import com.arsdigita.cms.contenttypes.GenericContactEntry; import com.arsdigita.cms.contenttypes.GenericContactEntry;
import com.arsdigita.cms.contenttypes.GenericPerson; import com.arsdigita.cms.contenttypes.GenericPerson;
import com.arsdigita.cms.contenttypes.GreyLiterature;
import com.arsdigita.cms.contenttypes.Link; import com.arsdigita.cms.contenttypes.Link;
import com.arsdigita.cms.contenttypes.Monograph; import com.arsdigita.cms.contenttypes.Monograph;
import com.arsdigita.cms.contenttypes.Person; import com.arsdigita.cms.contenttypes.Person;
@ -42,21 +48,27 @@ import com.arsdigita.cms.contenttypes.WorkingPaper;
import com.arsdigita.domain.DataObjectNotFoundException; import com.arsdigita.domain.DataObjectNotFoundException;
import com.arsdigita.london.util.Transaction; import com.arsdigita.london.util.Transaction;
import com.arsdigita.packaging.Program; import com.arsdigita.packaging.Program;
import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
import java.sql.Connection; import java.sql.Connection;
import java.sql.DriverManager; import java.sql.DriverManager;
import java.sql.ResultSet; import java.sql.ResultSet;
import java.sql.SQLException; import java.sql.SQLException;
import java.sql.Statement; import java.sql.Statement;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Enumeration;
import java.util.GregorianCalendar; import java.util.GregorianCalendar;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Properties; import java.util.Properties;
import java.util.StringTokenizer; import java.util.StringTokenizer;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLine;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
@ -83,6 +95,7 @@ public class DaBInImporter extends Program {
private Folder projects; private Folder projects;
private Folder publications; private Folder publications;
private Folder publishers; private Folder publishers;
private Folder files;
private Map<String, ContentBundle> departmentsMap; private Map<String, ContentBundle> departmentsMap;
private Map<String, ContentBundle> personsMap; private Map<String, ContentBundle> personsMap;
private Map<String, ContentBundle> projectsMap; private Map<String, ContentBundle> projectsMap;
@ -97,16 +110,6 @@ public class DaBInImporter extends Program {
public DaBInImporter() { public DaBInImporter() {
this(true); this(true);
/*super("DaBInImporter",
"0.1.0",
"MySQLHost MySQLUser MySQLPassword MySQLDB OrgaTitle OrgaName contentsection");
authorsAlpha = new HashMap<String, Folder>(12);
membersAlpha = new HashMap<String, Folder>(12);
authorsMap = new HashMap<String, SciAuthor>();
departmentsMap = new HashMap<String, SciDepartment>();
membersMap = new HashMap<String, SciMember>();
projectsMap = new HashMap<String, SciProject>();
publicationMap = new HashMap<String, Publication>();*/
} }
public DaBInImporter(boolean startup) { public DaBInImporter(boolean startup) {
@ -295,6 +298,8 @@ public class DaBInImporter extends Program {
publications = createFolder(root, "publikationen", "Publikationen"); publications = createFolder(root, "publikationen", "Publikationen");
files = createFolder(root, "dateien", "Dateien");
System.out.print("Creating organization item and " System.out.print("Creating organization item and "
+ "postal and office address items..."); + "postal and office address items...");
Transaction transaction = new Transaction() { Transaction transaction = new Transaction() {
@ -881,7 +886,7 @@ public class DaBInImporter extends Program {
result = stmt.executeQuery(String.format( result = stmt.executeQuery(String.format(
"SELECT Beteiligung, Person_Id " "SELECT Beteiligung, Person_Id "
+ "FROM arbeitspapierlink " + "FROM publikationlink "
+ "WHERE Publikation_Id = %s " + "WHERE Publikation_Id = %s "
+ "ORDER BY Reihenfolge", + "ORDER BY Reihenfolge",
data.getPublicationDaBInId())); data.getPublicationDaBInId()));
@ -908,6 +913,75 @@ public class DaBInImporter extends Program {
ex.printStackTrace(System.err); ex.printStackTrace(System.err);
} }
System.out.println("Collected volumes...");
try {
Statement stmt = connection.createStatement(
ResultSet.TYPE_SCROLL_INSENSITIVE,
ResultSet.CONCUR_UPDATABLE);
ResultSet result;
long counter = 1;
long number;
result = stmt.executeQuery(
"SELECT Publikation_Id, Name, Verlag, Jahr, Link, Beschreibung, Abteilung_Id, Sichtbarkeit "
+ "FROM publikation "
+ "WHERE (Typ = 'Sammelband' AND (ErschienenIn IS NULL OR CHAR_LENGTH(ErschienenIn) = 0)) "
+ "ORDER BY Name");
result.last();
number = result.getRow();
result.beforeFirst();
while (result.next()) {
System.out.printf("%4d of %4d: %s...\n", counter, number, result.
getString("Name"));
PublicationData data = new PublicationData();
data.setType(PublicationType.COLLECTED_VOLUME);
data.setPublicationDaBInId(result.getString("Publikation_Id"));
data.setName(result.getString("Name"));
data.setVerlag(result.getString("Verlag"));
data.setJahr(result.getString("Jahr"));
data.setLink(result.getString("Link"));
data.setBeschreibung(result.getString("Beschreibung"));
data.setAbteilungId(result.getString("Abteilung_Id"));
if ("Abteilung".equals(result.getString(
"Sichtbarkeit"))) {
data.setVisiblity(PublicationVisibility.DEPARTMENT);
} else if ("Persönlich".equals(result.getString(
"Sichtbarkeit"))) {
data.setVisiblity(PublicationVisibility.PRIVATE);
} else {
data.setVisiblity(PublicationVisibility.GLOBAL);
}
result = stmt.executeQuery(String.format(
"SELECT Beteiligung, Person_Id "
+ "FROM publikationlink "
+ "WHERE Publikation_Id = %s "
+ "ORDER BY Reihenfolge",
data.getPublicationDaBInId()));
while (result.next()) {
Authorship authorship;
authorship = new Authorship();
authorship.setPersonDaBInId(result.getString("Person_Id"));
authorship.setBeteiligung(result.getString("Beteiligung"));
data.addAuthor(authorship);
}
createPublication(data);
counter++;
}
} catch (SQLException ex) {
System.out.println("FAILED");
ex.printStackTrace(System.err);
} catch (Exception ex) {
System.out.println("FAILED");
ex.printStackTrace(System.err);
}
System.out.println("Articles in collected volumes..."); System.out.println("Articles in collected volumes...");
try { try {
Statement stmt = connection.createStatement( Statement stmt = connection.createStatement(
@ -951,6 +1025,24 @@ public class DaBInImporter extends Program {
} else { } else {
data.setVisiblity(PublicationVisibility.GLOBAL); data.setVisiblity(PublicationVisibility.GLOBAL);
} }
extractPages(result.getString("Verlag"), data);
result = stmt.executeQuery(String.format(
"SELECT Beteiligung, Person_Id "
+ "FROM publikationlink "
+ "WHERE Publikation_Id = %s "
+ "ORDER BY Reihenfolge",
data.getPublicationDaBInId()));
while (result.next()) {
Authorship authorship;
authorship = new Authorship();
authorship.setPersonDaBInId(result.getString("Person_Id"));
authorship.setBeteiligung(result.getString("Beteiligung"));
data.addAuthor(authorship);
}
createPublication(data); createPublication(data);
counter++; counter++;
@ -1004,6 +1096,24 @@ public class DaBInImporter extends Program {
} else { } else {
data.setVisiblity(PublicationVisibility.GLOBAL); data.setVisiblity(PublicationVisibility.GLOBAL);
} }
extractPages(result.getString("Verlag"), data);
result = stmt.executeQuery(String.format(
"SELECT Beteiligung, Person_Id "
+ "FROM publikationlink "
+ "WHERE Publikation_Id = %s "
+ "ORDER BY Reihenfolge",
data.getPublicationDaBInId()));
while (result.next()) {
Authorship authorship;
authorship = new Authorship();
authorship.setPersonDaBInId(result.getString("Person_Id"));
authorship.setBeteiligung(result.getString("Beteiligung"));
data.addAuthor(authorship);
}
createPublication(data); createPublication(data);
counter++; counter++;
@ -1057,6 +1167,23 @@ public class DaBInImporter extends Program {
} else { } else {
data.setVisiblity(PublicationVisibility.GLOBAL); data.setVisiblity(PublicationVisibility.GLOBAL);
} }
result = stmt.executeQuery(String.format(
"SELECT Beteiligung, Person_Id "
+ "FROM publikationlink "
+ "WHERE Publikation_Id = %s "
+ "ORDER BY Reihenfolge",
data.getPublicationDaBInId()));
while (result.next()) {
Authorship authorship;
authorship = new Authorship();
authorship.setPersonDaBInId(result.getString("Person_Id"));
authorship.setBeteiligung(result.getString("Beteiligung"));
data.addAuthor(authorship);
}
createPublication(data); createPublication(data);
counter++; counter++;
@ -1089,7 +1216,7 @@ public class DaBInImporter extends Program {
WorkingPaperData data = new WorkingPaperData(); WorkingPaperData data = new WorkingPaperData();
result = stmt.executeQuery(String.format( result = stmt.executeQuery(String.format(
"SELECT Name, Jahr, Beschreibung " "SELECT Name, Jahr, Beschreibung, Datei "
+ "FROM arbeitspapier " + "FROM arbeitspapier "
+ "WHERE Arbeitspapier_Id = %s AND Sprache = 'DE'", + "WHERE Arbeitspapier_Id = %s AND Sprache = 'DE'",
workingPaperIds.get(i))); workingPaperIds.get(i)));
@ -1101,6 +1228,9 @@ public class DaBInImporter extends Program {
data.setTitleDe(result.getString("Name")); data.setTitleDe(result.getString("Name"));
data.setDescDe(result.getString("Beschreibung")); data.setDescDe(result.getString("Beschreibung"));
data.setYear(result.getString("Jahr")); data.setYear(result.getString("Jahr"));
if (result.getBlob("Datei") != null) {
data.setFile(result.getBlob("Datei").getBinaryStream());
}
} }
result = stmt.executeQuery(String.format( result = stmt.executeQuery(String.format(
@ -1835,7 +1965,7 @@ public class DaBInImporter extends Program {
ContentBundle publication; ContentBundle publication;
switch (publicationData.getType()) { switch (publicationData.getType()) {
case MONOGRAPH: case MONOGRAPH: {
Monograph monographDe = null; Monograph monographDe = null;
Monograph monographEn = null; Monograph monographEn = null;
PublisherData publisherData; PublisherData publisherData;
@ -1843,14 +1973,7 @@ public class DaBInImporter extends Program {
monographDe = new Monograph(); monographDe = new Monograph();
monographDe.setTitle(publicationData.getName()); monographDe.setTitle(publicationData.getName());
monographDe.setName(publicationData.getName()); monographDe.setName(publicationData.getName());
try { extractYearOfPublication(publicationData, monographDe);
monographDe.setYearOfPublication(Integer.parseInt(publicationData.
getJahr()));
} catch (NumberFormatException ex) {
System.out.println(
"***WARNING: Invalid year of publication: Not a number. Ignoring.");
}
publisherData = extractPublisher(publicationData. publisherData = extractPublisher(publicationData.
getVerlag()); getVerlag());
if (publishersMap.containsKey(publisherData)) { if (publishersMap.containsKey(publisherData)) {
@ -1886,14 +2009,7 @@ public class DaBInImporter extends Program {
monographEn = new Monograph(); monographEn = new Monograph();
monographEn.setTitle(publicationData.getName()); monographEn.setTitle(publicationData.getName());
monographEn.setName(publicationData.getName()); monographEn.setName(publicationData.getName());
try { extractYearOfPublication(publicationData, monographEn);
monographEn.setYearOfPublication(Integer.parseInt(publicationData.
getJahr()));
} catch (NumberFormatException ex) {
System.out.println(
"***WARNING: Invalid year of publication: Not a number. Ignoring.");
}
publisherData = extractPublisher(publicationData. publisherData = extractPublisher(publicationData.
getVerlag()); getVerlag());
if (publishersMap.containsKey(publisherData)) { if (publishersMap.containsKey(publisherData)) {
@ -1926,25 +2042,273 @@ public class DaBInImporter extends Program {
} }
monographEn.save(); monographEn.save();
publicationDe = monographDe;
publicationEn = monographEn;
break; break;
case COLLECTED_VOLUME: }
System.out.println("Not supported yet."); case COLLECTED_VOLUME: {
return; CollectedVolume collectedVolumeDe;
//break; CollectedVolume collectedVolumeEn;
case ARTICLE_IN_COLLECTED_VOLUME: PublisherData publisherData;
System.out.println("Not supported yet.");
return; collectedVolumeDe = new CollectedVolume();
//break; collectedVolumeDe.setTitle(publicationData.getName());
case ARTICLE_IN_JOURNAL: collectedVolumeDe.setName(publicationData.getName());
System.out.println("Not supported yet."); extractYearOfPublication(publicationData, collectedVolumeDe);
return; publisherData = extractPublisher(publicationData.
//break; getVerlag());
case GREY_LITERATURE: if (publishersMap.containsKey(publisherData)) {
System.out.println("Not supported yet."); collectedVolumeDe.setPublisher((Publisher) publishersMap.
return; get(publisherData).getPrimaryInstance());
//break; } else {
System.out.println(
"***WARNING: Invalid publisher. Ignoring.");
}
if ((publicationData.getLink() != null)
&& !publicationData.getLink().isEmpty()) {
RelatedLink link = new RelatedLink();
link.setTitle(publicationData.getLink());
link.setTargetType(Link.EXTERNAL_LINK);
link.setTargetURI(publicationData.getLink());
link.setLinkOwner(collectedVolumeDe);
}
collectedVolumeDe.save();
collectedVolumeEn = new CollectedVolume();
collectedVolumeEn.setTitle(publicationData.getName());
collectedVolumeEn.setName(publicationData.getName());
extractYearOfPublication(publicationData, collectedVolumeEn);
publisherData = extractPublisher(publicationData.
getVerlag());
if (publishersMap.containsKey(publisherData)) {
collectedVolumeEn.setPublisher((Publisher) publishersMap.
get(publisherData).getPrimaryInstance());
} else {
System.out.println(
"***WARNING: Invalid publisher. Ignoring.");
}
if ((publicationData.getLink() != null)
&& !publicationData.getLink().isEmpty()) {
RelatedLink link = new RelatedLink();
link.setTitle(publicationData.getLink());
link.setTargetType(Link.EXTERNAL_LINK);
link.setTargetURI(publicationData.getLink());
link.setLinkOwner(collectedVolumeEn);
}
collectedVolumeEn.save();
publicationDe = collectedVolumeDe;
publicationEn = collectedVolumeEn;
break;
}
case ARTICLE_IN_COLLECTED_VOLUME: {
ArticleInCollectedVolume articleDe;
ArticleInCollectedVolume articleEn;
articleDe = new ArticleInCollectedVolume();
articleDe.setTitle(publicationData.getName());
articleDe.setName(publicationData.getName());
extractYearOfPublication(publicationData, articleDe);
if ((publicationData.getLink() != null)
&& !publicationData.getLink().isEmpty()) {
RelatedLink link = new RelatedLink();
link.setTitle(publicationData.getLink());
link.setTargetType(Link.EXTERNAL_LINK);
link.setTargetURI(publicationData.getLink());
link.setLinkOwner(articleDe);
}
if (publicationData.getBeschreibung() != null) {
articleDe.setAbstract(publicationData.
getBeschreibung());
}
if ((publicationData.getErschienenIn() != null)
&& !publicationData.getErschienenIn().isEmpty()) {
articleDe.setMisc(publicationData.getErschienenIn());
}
if (publicationData.getPagesFrom() != 0) {
articleDe.setPagesFrom(
publicationData.getPagesFrom());
articleDe.setPagesTo(publicationData.getPagesTo());
}
articleDe.save();
articleEn = new ArticleInCollectedVolume();
articleEn.setTitle(publicationData.getName());
articleEn.setName(publicationData.getName());
extractYearOfPublication(publicationData, articleEn);
if ((publicationData.getLink() != null)
&& !publicationData.getLink().isEmpty()) {
RelatedLink link = new RelatedLink();
link.setTitle(publicationData.getLink());
link.setTargetType(Link.EXTERNAL_LINK);
link.setTargetURI(publicationData.getLink());
link.setLinkOwner(articleEn);
}
if (publicationData.getBeschreibung() != null) {
articleEn.setAbstract(publicationData.
getBeschreibung());
}
if ((publicationData.getErschienenIn() != null)
&& !publicationData.getErschienenIn().isEmpty()) {
articleEn.setMisc(publicationData.getErschienenIn());
}
if (publicationData.getPagesFrom() != 0) {
articleEn.setPagesFrom(
publicationData.getPagesFrom());
articleEn.setPagesTo(publicationData.getPagesTo());
}
articleEn.save();
publicationDe = articleDe;
publicationEn = articleEn;
break;
}
case ARTICLE_IN_JOURNAL: {
ArticleInJournal articleDe;
ArticleInJournal articleEn;
articleDe = new ArticleInJournal();
articleDe.setTitle(publicationData.getName());
articleDe.setName(publicationData.getName());
extractYearOfPublication(publicationData, articleDe);
if ((publicationData.getLink() != null)
&& !publicationData.getLink().isEmpty()) {
RelatedLink link = new RelatedLink();
link.setTitle(publicationData.getLink());
link.setTargetType(Link.EXTERNAL_LINK);
link.setTargetURI(publicationData.getLink());
link.setLinkOwner(articleDe);
}
if (publicationData.getBeschreibung() != null) {
articleDe.setAbstract(publicationData.
getBeschreibung());
}
if ((publicationData.getErschienenIn() != null)
&& !publicationData.getErschienenIn().isEmpty()) {
articleDe.setMisc(publicationData.getErschienenIn());
}
if (publicationData.getPagesFrom() != 0) {
articleDe.setPagesFrom(
publicationData.getPagesFrom());
articleDe.setPagesTo(publicationData.getPagesTo());
}
articleDe.save();
articleEn = new ArticleInJournal();
articleEn.setTitle(publicationData.getName());
articleEn.setName(publicationData.getName());
extractYearOfPublication(publicationData, articleEn);
if ((publicationData.getLink() != null)
&& !publicationData.getLink().isEmpty()) {
RelatedLink link = new RelatedLink();
link.setTitle(publicationData.getLink());
link.setTargetType(Link.EXTERNAL_LINK);
link.setTargetURI(publicationData.getLink());
link.setLinkOwner(articleEn);
}
if (publicationData.getBeschreibung() != null) {
articleEn.setAbstract(publicationData.
getBeschreibung());
}
if ((publicationData.getErschienenIn() != null)
&& !publicationData.getErschienenIn().isEmpty()) {
articleEn.setMisc(publicationData.getErschienenIn());
}
if (publicationData.getPagesFrom() != 0) {
articleEn.setPagesFrom(
publicationData.getPagesFrom());
articleEn.setPagesTo(publicationData.getPagesTo());
}
articleEn.save();
publicationDe = articleDe;
publicationEn = articleEn;
break;
}
case GREY_LITERATURE: {
GreyLiterature greyDe;
GreyLiterature greyEn;
greyDe = new GreyLiterature();
greyDe.setTitle(publicationData.getName());
greyDe.setName(publicationData.getName());
extractYearOfPublication(publicationData, greyDe);
if ((publicationData.getLink() != null)
&& !publicationData.getLink().isEmpty()) {
RelatedLink link = new RelatedLink();
link.setTitle(publicationData.getLink());
link.setTargetType(Link.EXTERNAL_LINK);
link.setTargetURI(publicationData.getLink());
link.setLinkOwner(greyDe);
}
if (publicationData.getBeschreibung() != null) {
greyDe.setAbstract(publicationData.getBeschreibung());
}
if ((publicationData.getErschienenIn() != null)
&& !publicationData.getErschienenIn().isEmpty()) {
greyDe.setMisc(publicationData.getErschienenIn());
}
if (publicationData.getPagesFrom() != 0) {
greyDe.setPagesFrom(
publicationData.getPagesFrom());
greyDe.setPagesTo(publicationData.getPagesTo());
}
greyDe.save();
greyEn = new GreyLiterature();
greyEn.setTitle(publicationData.getName());
greyEn.setName(publicationData.getName());
extractYearOfPublication(publicationData, greyEn);
if ((publicationData.getLink() != null)
&& !publicationData.getLink().isEmpty()) {
RelatedLink link = new RelatedLink();
link.setTitle(publicationData.getLink());
link.setTargetType(Link.EXTERNAL_LINK);
link.setTargetURI(publicationData.getLink());
link.setLinkOwner(greyEn);
}
if (publicationData.getBeschreibung() != null) {
greyEn.setAbstract(publicationData.getBeschreibung());
}
if ((publicationData.getErschienenIn() != null)
&& !publicationData.getErschienenIn().isEmpty()) {
greyEn.setMisc(publicationData.getErschienenIn());
}
if (publicationData.getPagesFrom() != 0) {
greyEn.setPagesFrom(
publicationData.getPagesFrom());
greyEn.setPagesTo(publicationData.getPagesTo());
}
greyEn.save();
publicationDe = greyDe;
publicationEn = greyEn;
break;
}
} }
publicationDe.setLanguage("de");
publicationEn.setLanguage("en");
System.out.println("\tAssigning authors...\n"); System.out.println("\tAssigning authors...\n");
int i = 1; int i = 1;
for (Authorship authorship : publicationData.getAuthors()) { for (Authorship authorship : publicationData.getAuthors()) {
@ -2045,6 +2409,7 @@ public class DaBInImporter extends Program {
} }
workingPaperDe.setOrganization(orgaDe); workingPaperDe.setOrganization(orgaDe);
workingPaperDe.setPlace("Bremen"); workingPaperDe.setPlace("Bremen");
extractYearOfPublication(workingPaperData, workingPaperDe);
workingPaperDe.setLanguage("de"); workingPaperDe.setLanguage("de");
workingPaperDe.setContentSection(section); workingPaperDe.setContentSection(section);
workingPaperDe.save(); workingPaperDe.save();
@ -2070,14 +2435,17 @@ public class DaBInImporter extends Program {
workingPaperNameEn.substring(0, 200); workingPaperNameEn.substring(0, 200);
} }
workingPaperEn.setName(workingPaperNameEn); workingPaperEn.setName(workingPaperNameEn);
if (workingPaperData.getDescEn().length() > 8000) { if (workingPaperData.getDescEn().length() > 4096) {
System.out.println(
"***Warning: Value of DaBIn field abstract too long for abstracts (max: 4096 characters). Truncating.");
workingPaperEn.setAbstract(workingPaperData.getDescEn(). workingPaperEn.setAbstract(workingPaperData.getDescEn().
substring(0, 8000)); substring(0, 4095));
} else { } else {
workingPaperEn.setAbstract(workingPaperData.getDescEn()); workingPaperEn.setAbstract(workingPaperData.getDescEn());
} }
workingPaperEn.setOrganization(orgaEn); workingPaperEn.setOrganization(orgaEn);
workingPaperEn.setPlace("Bremen"); workingPaperEn.setPlace("Bremen");
extractYearOfPublication(workingPaperData, workingPaperEn);
workingPaperEn.setLanguage("En"); workingPaperEn.setLanguage("En");
workingPaperEn.setContentSection(section); workingPaperEn.setContentSection(section);
workingPaperEn.save(); workingPaperEn.save();
@ -2098,7 +2466,80 @@ public class DaBInImporter extends Program {
publications.addItem(workingPaper); publications.addItem(workingPaper);
workingPaperMap.put(workingPaperData.getDabinId(), workingPaper); workingPaperMap.put(workingPaperData.getDabinId(), workingPaper);
System.out.println("OK"); System.out.println("\tOK");
System.out.print("\tAssigning file...");
if (workingPaperData.getFile() == null) {
System.out.println("No file found.");
} else {
try {
File tmpFile = File.createTempFile(
"ccm_workingpaperCompressed", "zip");
FileOutputStream tmpFileStream =
new FileOutputStream(tmpFile);
byte[] buf = new byte[4096];
int len;
while ((len = workingPaperData.getFile().read(buf)) > 0) {
tmpFileStream.write(buf);
}
ZipFile zipFile = new ZipFile(tmpFile);
Enumeration<? extends ZipEntry> entries = zipFile.
entries();
if (entries.hasMoreElements()) {
InputStream unzip = zipFile.getInputStream(entries.
nextElement());
File pdf = File.createTempFile("ccm_workingPaper",
"pdf");
FileOutputStream pdfFileStream = new FileOutputStream(
pdf);
byte[] buffer = new byte[4096];
while ((len = unzip.read(buffer)) > 0) {
pdfFileStream.write(buffer);
}
FileStorageItem fsi = new FileStorageItem();
fsi.setTitle("Datei "
+ ((WorkingPaper) workingPaper.
getPrimaryInstance()).getTitle());
fsi.setName("datei_" + ((WorkingPaper) workingPaper.
getPrimaryInstance()).
getName());
FileAsset file = new FileAsset();
file.loadFromFile(workingPaper.getPrimaryInstance().
getName(), pdf, "application/pdf");
file.setContentSection(section);
fsi.setContentSection(section);
fsi.setLanguage("de");
ContentBundle bundle = new ContentBundle(fsi);
bundle.setContentSection(section);
bundle.setDefaultLanguage("de");
files.addItem(bundle);
RelatedLink download = new RelatedLink();
download.setTitle("download");
download.setTargetType(Link.INTERNAL_LINK);
download.setTargetItem(fsi);
download.setLinkOwner(workingPaperDe);
download = new RelatedLink();
download.setTitle("download");
download.setTargetType(Link.INTERNAL_LINK);
download.setTargetItem(fsi);
download.setLinkOwner(workingPaperEn);
}
} catch (IOException ex) {
System.out.println(
"***ERROR: Failed to copy file from DaBIn to CCM: ");
ex.printStackTrace(System.out);
}
}
System.out.print("\tAssigning authors to working paper...\n"); System.out.print("\tAssigning authors to working paper...\n");
int i = 1; int i = 1;
@ -2318,6 +2759,70 @@ public class DaBInImporter extends Program {
return publisher; return publisher;
} }
private void extractYearOfPublication(final PublicationData data,
final Publication publication) {
try {
if ((data.getJahr() != null)
&& (data.getJahr().length() <= 4)) {
publication.setYearOfPublication(
Integer.parseInt(data.getJahr()));
} else if ((data.getJahr() != null)
&& (data.getJahr().length() > 4)) {
publication.setYearOfPublication(Integer.parseInt(data.getJahr().
substring(0, 4)));
}
} catch (NumberFormatException ex) {
System.out.println(
"***WARNING: Invalid year of publication: Not a number. Ignoring.");
}
}
private void extractYearOfPublication(final WorkingPaperData data,
final Publication publication) {
try {
if ((data.getYear() != null)
&& (data.getYear().length() <= 4)) {
publication.setYearOfPublication(
Integer.parseInt(data.getYear()));
} else if ((data.getYear() != null)
&& (data.getYear().length() > 4)) {
publication.setYearOfPublication(Integer.parseInt(data.getYear().
substring(0, 4)));
}
} catch (NumberFormatException ex) {
System.out.println(
"***WARNING: Invalid year of publication: Not a number. Ignoring.");
}
}
private void extractPages(final String data,
final PublicationData publicationData) {
int index;
int leftLimit;
int rightLimit;
String tmp;
int pagesFrom;
int pagesTo;
index = data.lastIndexOf('-');
leftLimit = data.lastIndexOf(' ', index);
rightLimit = data.indexOf(' ', index);
try {
tmp = data.substring(leftLimit + 1, index);
pagesFrom = Integer.parseInt(tmp);
tmp = data.substring(index + 1, rightLimit);
pagesTo = Integer.parseInt(tmp);
} catch (NumberFormatException ex) {
System.out.println("Malformed pages. Ignoring.");
return;
}
publicationData.setPagesFrom(pagesFrom);
publicationData.setPagesTo(pagesTo);
}
public static void main(String[] args) { public static void main(String[] args) {
new DaBInImporter().run(args); new DaBInImporter().run(args);
} }

View File

@ -17,6 +17,8 @@ public class PublicationData {
private String beschreibung; private String beschreibung;
private String abteilungId; private String abteilungId;
private String erschienenIn; private String erschienenIn;
private int pagesFrom;
private int pagesTo;
private PublicationVisibility visiblity; private PublicationVisibility visiblity;
private PublicationType type; private PublicationType type;
private List<Authorship> authors = new ArrayList<Authorship>(); private List<Authorship> authors = new ArrayList<Authorship>();
@ -34,7 +36,13 @@ public class PublicationData {
} }
public void setBeschreibung(String beschreibung) { public void setBeschreibung(String beschreibung) {
this.beschreibung = beschreibung; if (beschreibung.length() < 4096) {
this.beschreibung = beschreibung;
} else {
System.out.println(
"***Warning: Value of DaBIn field 'Beschreibung' is too long for abstract (max: 4096 characters). Truncating.");
this.beschreibung = beschreibung.substring(0, 4095);
}
} }
public String getErschienenIn() { public String getErschienenIn() {
@ -112,4 +120,20 @@ public class PublicationData {
public void addAuthor(final Authorship author) { public void addAuthor(final Authorship author) {
authors.add(author); authors.add(author);
} }
public int getPagesFrom() {
return pagesFrom;
}
public void setPagesFrom(int pagesFrom) {
this.pagesFrom = pagesFrom;
}
public int getPagesTo() {
return pagesTo;
}
public void setPagesTo(int pagesTo) {
this.pagesTo = pagesTo;
}
} }

View File

@ -1,5 +1,6 @@
package com.arsdigita.cms.dabin; package com.arsdigita.cms.dabin;
import java.io.InputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
@ -15,6 +16,7 @@ public class WorkingPaperData {
private String year; private String year;
private String descDe; private String descDe;
private String descEn; private String descEn;
private InputStream file;
private List<Authorship> authors; private List<Authorship> authors;
public WorkingPaperData() { public WorkingPaperData() {
@ -34,7 +36,13 @@ public class WorkingPaperData {
} }
public void setDescDe(String descDe) { public void setDescDe(String descDe) {
this.descDe = descDe; if (descDe.length() < 4096) {
this.descDe = descDe;
} else {
System.out.println(
"Value of DaBIn field is longer than maximum length for abstract (4096 characters). Truncating");
this.descDe = descDe.substring(0, 4096);
}
} }
public String getDescEn() { public String getDescEn() {
@ -42,7 +50,13 @@ public class WorkingPaperData {
} }
public void setDescEn(String descEn) { public void setDescEn(String descEn) {
this.descEn = descEn; if (descEn.length() < 4096) {
this.descEn = descEn;
} else {
System.out.println(
"Value of DaBIn field is longer than maximum length for abstract (4096 characters). Truncating");
this.descEn = descEn.substring(0, 4096);
}
} }
public String getTitleDe() { public String getTitleDe() {
@ -73,6 +87,14 @@ public class WorkingPaperData {
return authors; return authors;
} }
public InputStream getFile() {
return file;
}
public void setFile(InputStream file) {
this.file = file;
}
public void setAuthors(List<Authorship> authors) { public void setAuthors(List<Authorship> authors) {
this.authors = authors; this.authors = authors;
} }