Importer für RIS Format und Converter für für die RIS-Typen INPR, GEN, ABST, BOOK, JFULL, EBOOK, JOUR und EJOUR

git-svn-id: https://svn.libreccm.org/ccm/trunk@2029 8810af33-2d31-482b-a856-94f89814c4df
master
jensp 2013-01-07 17:35:15 +00:00
parent fdc4191c25
commit fa7e3ca2b5
12 changed files with 281 additions and 88 deletions

View File

@ -347,6 +347,8 @@ public enum RisField {
C1,
C2,
C3,
C4,
C5,
/**
* Number of volumes
*/

View File

@ -16,15 +16,21 @@ public class RisDataset {
private final RisType type;
private final Map<RisField, List<String>> values = new EnumMap<RisField, List<String>>(RisField.class);
private final int firstLine;
public RisDataset(final RisType type) {
public RisDataset(final RisType type, final int firstLine) {
this.type = type;
this.firstLine = firstLine;
}
public RisType getType() {
return type;
}
public int getFirstLine() {
return firstLine;
}
public Map<RisField, List<String>> getValues() {
return Collections.unmodifiableMap(values);
}

View File

@ -34,7 +34,7 @@ public class RisParser {
+ "Aborting import.", i + 1));
} else {
try {
entries.add(startDataset(field.getValue()));
entries.add(startDataset(field.getValue(), i + 1));
openDataset = true;
} catch (IllegalArgumentException ex) {
throw new SciPublicationsImportException(String.format("Invalid type at line %d.", i + 1), ex);
@ -58,9 +58,9 @@ public class RisParser {
return Collections.unmodifiableList(entries);
}
private RisDataset startDataset(final String type) {
private RisDataset startDataset(final String type, final int firstLine) {
final RisType risType = RisType.valueOf(type);
return new RisDataset(risType);
return new RisDataset(risType, firstLine);
}
private RisFieldValue parseRisLine(final String line, final int index) throws SciPublicationsImportException {

View File

@ -0,0 +1,17 @@
package com.arsdigita.cms.scipublications.importer.ris.converters;
import com.arsdigita.cms.scipublications.imexporter.ris.RisType;
/**
*
* @author Jens Pelzetter <jens@jp-digital.de>
* @version $Id$
*/
public class AbstConverter extends JourConverter {
@Override
public RisType getRisType() {
return RisType.ABST;
}
}

View File

@ -1,5 +1,6 @@
package com.arsdigita.cms.scipublications.importer.ris.converters;
import com.arsdigita.cms.contenttypes.ArticleInJournal;
import com.arsdigita.cms.contenttypes.Publication;
import com.arsdigita.cms.contenttypes.PublicationWithPublisher;
import com.arsdigita.cms.scipublications.imexporter.ris.RisField;
@ -19,6 +20,49 @@ import java.util.List;
*/
public abstract class AbstractRisConverter implements RisConverter {
protected void processField(final RisDataset dataset,
final RisField field,
final Publication publication,
final String targetField,
final PublicationImportReport report,
final boolean pretend) {
final List<String> values = dataset.getValues().get(field);
if ((values != null) && !values.isEmpty()) {
publication.set(targetField, values.get(0));
report.addField(new FieldImportReport(targetField, values.get(0)));
}
}
protected void processIntField(final RisDataset dataset,
final RisField field,
final Publication publication,
final String targetField,
final PublicationImportReport report,
final boolean pretend) {
final List<String> values = dataset.getValues().get(field);
if ((values != null) && !values.isEmpty()) {
final String valueStr = values.get(0);
try {
final int value = Integer.parseInt(valueStr);
publication.set(targetField, value);
publication.set(targetField, valueStr);
} catch (NumberFormatException ex) {
report.addMessage(String.format("Failed to parse value of field '%s' into an integer for dataset "
+ "starting on line %d.",
field,
dataset.getFirstLine()));
}
}
}
protected void processTitle(final RisDataset dataset,
final Publication publication,
final PublicationImportReport report,
final boolean pretend) {
publication.setTitle(dataset.getValues().get(RisField.TI).get(0));
publication.setTitle(dataset.getValues().get(RisField.TI).get(0));
}
protected void processAuthors(final RisDataset dataset,
final RisField risField,
final ImporterUtil importerUtil,
@ -47,7 +91,13 @@ public abstract class AbstractRisConverter implements RisConverter {
final List<String> authors = dataset.getValues().get(risField);
if ((authors != null) && !authors.isEmpty()) {
for (String authorStr : authors) {
processAuthorStr(authorStr, isEditors, importerUtil, publication, report, pretend);
processAuthorStr(authorStr,
isEditors,
importerUtil,
publication,
report,
dataset.getFirstLine(),
pretend);
}
}
}
@ -57,12 +107,14 @@ public abstract class AbstractRisConverter implements RisConverter {
final ImporterUtil importerUtil,
final Publication publication,
final PublicationImportReport importReport,
final int firstLine,
final boolean pretend) {
final AuthorData authorData = new AuthorData();
final String[] tokens = authorStr.split(",");
if (tokens.length == 0) {
importReport.addMessage(String.format("Failed to parse author string '%s'.", authorStr));
importReport.addMessage(String.format("Failed to parse author string '%s' at dataset starting at line %d.",
authorStr, firstLine));
return;
}
@ -117,7 +169,8 @@ public abstract class AbstractRisConverter implements RisConverter {
publication.setNumberOfPages(value);
report.addField(new FieldImportReport("number of pages", numberOfPages.get(0)));
} catch (NumberFormatException ex) {
report.addMessage("Failed to parse number of pages");
report.addMessage(String.format("Failed to parse number of pages at dataset starting at line %d",
dataset.getFirstLine()));
}
}
@ -134,7 +187,8 @@ public abstract class AbstractRisConverter implements RisConverter {
publication.setNumberOfVolumes(value);
report.addField(new FieldImportReport("number of volumes", numberOfVols.get(0)));
} catch (NumberFormatException ex) {
report.addMessage("Failed to parse number of volumes.");
report.addMessage(String.format("Failed to parse number of volumes at dataset starting at line %d.",
dataset.getFirstLine()));
}
}
}
@ -150,7 +204,8 @@ public abstract class AbstractRisConverter implements RisConverter {
publication.setVolume(value);
report.addField(new FieldImportReport("volume", volume.get(0)));
} catch (NumberFormatException ex) {
report.addMessage("Failed to parse value of field 'volume'.");
report.addMessage(String.format("Failed to parse value of field 'volume' on dataset starting "
+ "at line %d.", dataset.getFirstLine()));
}
}
}
@ -166,9 +221,77 @@ public abstract class AbstractRisConverter implements RisConverter {
report.addField(new FieldImportReport("year", yearStr));
} catch (NumberFormatException ex) {
report.addMessage(String.format("Failed to convert year of publication value '%s' from RIS to"
+ "integer value. Setting year of publication to 0"));
+ "integer value on dataset starting at line %d. Setting year of "
+ "publication to 0", dataset.getFirstLine()));
publication.setYearOfPublication(0);
}
}
protected void processSeries(final RisDataset dataset,
final RisField field,
final Publication publication,
final ImporterUtil importerUtil,
final boolean pretend,
final PublicationImportReport report) {
final List<String> series = dataset.getValues().get(field);
if ((series != null) && !series.isEmpty()) {
report.setSeries(importerUtil.processSeries(publication, series.get(0), pretend));
}
}
protected void processJournal(final RisDataset dataset,
final RisField field,
final ArticleInJournal article,
final ImporterUtil importerUtil,
final boolean pretend,
final PublicationImportReport report) {
final List<String> journal = dataset.getValues().get(field);
if ((journal != null) && !journal.isEmpty()) {
report.setJournal(importerUtil.processJournal(article, journal.get(0), pretend));
}
}
protected void processPages(final RisDataset dataset,
final RisField field,
final Publication publication,
final boolean pretend,
final PublicationImportReport report) {
final List<String> values = dataset.getValues().get(field);
final String pages = values.get(0);
final String[] tokens = pages.split("-");
if (tokens.length == 2) {
try {
final int pagesFrom = Integer.parseInt(tokens[0]);
final int pagesTo = Integer.parseInt(tokens[1]);
publication.set("pagesFrom", pagesFrom);
publication.set("pagesTo", pagesTo);
report.addField(new FieldImportReport("pagesFrom", Integer.toString(pagesFrom)));
report.addField(new FieldImportReport("pagesTo", Integer.toString(pagesTo)));
} catch (NumberFormatException ex) {
report.addMessage(String.format("Failed to parse pages value in dataset starting at line %d. "
+ "On of the values given is not an integer.",
dataset.getFirstLine()));
}
} else if (tokens.length == 1) {
try {
final int pagesFrom = Integer.parseInt(tokens[0]);
publication.set("pagesFrom", pagesFrom);
report.addField(new FieldImportReport("pagesFrom", Integer.toString(pagesFrom)));
} catch (NumberFormatException ex) {
report.addMessage(String.format("Failed to parse pages value in dataset starting at line %d. "
+ "Value is not an integer.",
dataset.getFirstLine()));
}
} else if (tokens.length > 2) {
report.addMessage(String.format("Failed to parse pages value in dataset starting at line %d. "
+ "Invalid format",
dataset.getFirstLine()));
}
}
}

View File

@ -26,8 +26,7 @@ public class BookConverter extends AbstractRisConverter {
final Monograph monograph = new Monograph();
monograph.setTitle(dataset.getValues().get(RisField.TI).get(0));
report.setTitle(dataset.getValues().get(RisField.TI).get(0));
processTitle(dataset, monograph, report, pretend);
processYear(dataset, pretend, monograph, report);
@ -37,34 +36,19 @@ public class BookConverter extends AbstractRisConverter {
processPublisher(dataset, pretend, monograph, importerUtil, report);
final List<String> abstractList = dataset.getValues().get(RisField.AB);
if ((abstractList != null) && (!abstractList.isEmpty())) {
monograph.setAbstract(abstractList.get(0));
report.addField(new FieldImportReport("abstract", abstractList.get(0)));
}
processField(dataset, RisField.AB, monograph, "abstract", report, pretend);
processField(dataset, RisField.ET, monograph, "edition", report, pretend);
final List<String> edition = dataset.getValues().get(RisField.ET);
if ((edition != null) && !edition.isEmpty()) {
monograph.setEdition(edition.get(0));
report.addField(new FieldImportReport("edition", edition.get(0)));
}
processNumberOfVolumes(dataset, pretend, monograph, report);
final List<String> isbn = dataset.getValues().get(RisField.SN);
if ((isbn != null) && !isbn.isEmpty()) {
monograph.setISBN(isbn.get(0));
report.addField(new FieldImportReport("isbn", isbn.get(0)));
}
processField(dataset, RisField.SN, monograph, "isbn", report, pretend);
processNumberOfPages(dataset, pretend, monograph, report);
processVolume(dataset, pretend, monograph, report);
final List<String> series = dataset.getValues().get(RisField.T2);
if ((series != null) && !series.isEmpty()) {
report.setSeries(importerUtil.processSeries(monograph, series.get(0), pretend));
}
processSeries(dataset, RisField.T2, monograph, importerUtil, pretend, report);
return report;
}

View File

@ -3,11 +3,9 @@ package com.arsdigita.cms.scipublications.importer.ris.converters;
import com.arsdigita.cms.contenttypes.Monograph;
import com.arsdigita.cms.scipublications.imexporter.ris.RisField;
import com.arsdigita.cms.scipublications.imexporter.ris.RisType;
import com.arsdigita.cms.scipublications.importer.report.FieldImportReport;
import com.arsdigita.cms.scipublications.importer.report.PublicationImportReport;
import com.arsdigita.cms.scipublications.importer.ris.RisDataset;
import com.arsdigita.cms.scipublications.importer.util.ImporterUtil;
import java.util.List;
/**
* Converter for the RIS type {@code EBOOK} to the SciPublications type {@link Monograph}.
@ -26,9 +24,8 @@ public class EbookConverter extends AbstractRisConverter {
final Monograph monograph = new Monograph();
monograph.setTitle(dataset.getValues().get(RisField.TI).get(0));
report.setTitle(dataset.getValues().get(RisField.TI).get(0));
processTitle(dataset, monograph, report, pretend);
processYear(dataset, pretend, monograph, report);
processAuthors(dataset, RisField.AU, importerUtil, monograph, report, pretend);
@ -37,32 +34,17 @@ public class EbookConverter extends AbstractRisConverter {
processPublisher(dataset, pretend, monograph, importerUtil, report);
final List<String> abstractList = dataset.getValues().get(RisField.AB);
if ((abstractList != null) && (!abstractList.isEmpty())) {
monograph.setAbstract(abstractList.get(0));
report.addField(new FieldImportReport("abstract", abstractList.get(0)));
}
processField(dataset, RisField.AB, monograph, "abstract", report, pretend);
processField(dataset, RisField.ET, monograph, "edition", report, pretend);
final List<String> edition = dataset.getValues().get(RisField.ET);
if ((edition != null) && !edition.isEmpty()) {
monograph.setEdition(edition.get(0));
report.addField(new FieldImportReport("edition", edition.get(0)));
}
final List<String> isbn = dataset.getValues().get(RisField.SN);
if ((isbn != null) && !isbn.isEmpty()) {
monograph.setISBN(isbn.get(0));
report.addField(new FieldImportReport("isbn", isbn.get(0)));
}
processField(dataset, RisField.SN, monograph, "isbn", report, pretend);
processNumberOfPages(dataset, pretend, monograph, report);
processVolume(dataset, pretend, monograph, report);
final List<String> series = dataset.getValues().get(RisField.T2);
if ((series != null) && !series.isEmpty()) {
report.setSeries(importerUtil.processSeries(monograph, series.get(0), pretend));
}
processSeries(dataset, RisField.T2, monograph, importerUtil, pretend, report);
return report;
}

View File

@ -1,6 +1,7 @@
package com.arsdigita.cms.scipublications.importer.ris.converters;
import com.arsdigita.cms.contenttypes.ArticleInJournal;
import com.arsdigita.cms.scipublications.imexporter.ris.RisField;
import com.arsdigita.cms.scipublications.imexporter.ris.RisType;
import com.arsdigita.cms.scipublications.importer.report.PublicationImportReport;
import com.arsdigita.cms.scipublications.importer.ris.RisConverter;
@ -13,7 +14,7 @@ import com.arsdigita.cms.scipublications.importer.util.ImporterUtil;
* @author Jens Pelzetter <jens@jp-digital.de>
* @version $Id$
*/
public class EjourConverter implements RisConverter {
public class EjourConverter extends AbstractRisConverter implements RisConverter {
public PublicationImportReport convert(final RisDataset dataset,
final ImporterUtil importerUtil,
@ -24,7 +25,21 @@ public class EjourConverter implements RisConverter {
final ArticleInJournal article = new ArticleInJournal();
processTitle(dataset, article, report, pretend);
processYear(dataset, pretend, article, report);
processAuthors(dataset, RisField.AU, importerUtil, article, report, pretend);
processField(dataset, RisField.AB, article, "abstract", report, pretend);
processJournal(dataset, RisField.T2, article, importerUtil, pretend, report);
processField(dataset, RisField.M1, article, "issue", report, pretend);
processPages(dataset, RisField.SP, article, pretend, report);
processField(dataset, RisField.VL, article, "volume", report, pretend);
return report;
}

View File

@ -4,12 +4,9 @@ import com.arsdigita.cms.contenttypes.GreyLiterature;
import com.arsdigita.cms.contenttypes.Monograph;
import com.arsdigita.cms.scipublications.imexporter.ris.RisField;
import com.arsdigita.cms.scipublications.imexporter.ris.RisType;
import com.arsdigita.cms.scipublications.importer.report.FieldImportReport;
import com.arsdigita.cms.scipublications.importer.report.PublicationImportReport;
import com.arsdigita.cms.scipublications.importer.ris.RisConverter;
import com.arsdigita.cms.scipublications.importer.ris.RisDataset;
import com.arsdigita.cms.scipublications.importer.util.ImporterUtil;
import java.util.List;
/**
* Converter for the RIS type {@code GEN} to the SciPublications {@link GreyLiterature}
@ -28,19 +25,9 @@ public class GenConverter extends AbstractRisConverter {
final Monograph publication = new Monograph();
publication.setTitle(dataset.getValues().get(RisField.TI).get(0));
report.setTitle(dataset.getValues().get(RisField.TI).get(0));
processTitle(dataset, publication, report, pretend);
final String yearStr = dataset.getValues().get(RisField.PY).get(0);
try {
final int year = Integer.parseInt(yearStr);
publication.setYearOfPublication(year);
report.addField(new FieldImportReport("year", yearStr));
} catch (NumberFormatException ex) {
report.addMessage(String.format("Failed to convert year of publication value '%s' from RIS to"
+ "integer value. Setting year of publication to 0"));
publication.setYearOfPublication(0);
}
processYear(dataset, pretend, publication, report);
processAuthors(dataset, RisField.AU, importerUtil, publication, report, pretend);
processAuthors(dataset, RisField.A2, importerUtil, publication, report, pretend);
@ -49,26 +36,16 @@ public class GenConverter extends AbstractRisConverter {
processPublisher(dataset, pretend, publication, importerUtil, report);
final List<String> abstractList = dataset.getValues().get(RisField.AB);
if ((abstractList != null) && (!abstractList.isEmpty())) {
publication.setAbstract(abstractList.get(0));
report.addField(new FieldImportReport("abstract", abstractList.get(0)));
}
processField(dataset, RisField.AB, publication, "abstract", report, pretend);
final List<String> edition = dataset.getValues().get(RisField.ET);
if ((edition != null) && !edition.isEmpty()) {
publication.setEdition(edition.get(0));
report.addField(new FieldImportReport("edition", edition.get(0)));
}
processField(dataset, RisField.ET, publication, "edition", report, pretend);
final List<String> isbn = dataset.getValues().get(RisField.SN);
if ((isbn != null) && !isbn.isEmpty()) {
publication.setISBN(isbn.get(0));
report.addField(new FieldImportReport("isbn", isbn.get(0)));
}
processField(dataset, RisField.SN, publication, "isbn", report, pretend);
processNumberOfPages(dataset, pretend, publication, report);
processNumberOfVolumes(dataset, pretend, publication, report);
processVolume(dataset, pretend, publication, report);
return report;

View File

@ -0,0 +1,17 @@
package com.arsdigita.cms.scipublications.importer.ris.converters;
import com.arsdigita.cms.scipublications.imexporter.ris.RisType;
/**
*
* @author Jens Pelzetter <jens@jp-digital.de>
* @version $Id$
*/
public class InprConverter extends JourConverter {
@Override
public RisType getRisType() {
return RisType.INPR;
}
}

View File

@ -0,0 +1,17 @@
package com.arsdigita.cms.scipublications.importer.ris.converters;
import com.arsdigita.cms.scipublications.imexporter.ris.RisType;
/**
*
* @author Jens Pelzetter <jens@jp-digital.de>
* @version $Id$
*/
public class JfullConverter extends JourConverter {
@Override
public RisType getRisType() {
return RisType.JFULL;
}
}

View File

@ -0,0 +1,53 @@
package com.arsdigita.cms.scipublications.importer.ris.converters;
import com.arsdigita.cms.contenttypes.ArticleInJournal;
import com.arsdigita.cms.scipublications.imexporter.ris.RisField;
import com.arsdigita.cms.scipublications.imexporter.ris.RisType;
import com.arsdigita.cms.scipublications.importer.report.PublicationImportReport;
import com.arsdigita.cms.scipublications.importer.ris.RisDataset;
import com.arsdigita.cms.scipublications.importer.util.ImporterUtil;
/**
*
* @author Jens Pelzetter <jens@jp-digital.de>
* @version $Id$
*/
public class JourConverter extends AbstractRisConverter {
@Override
public PublicationImportReport convert(final RisDataset dataset,
final ImporterUtil importerUtil,
final boolean pretend,
final boolean publishNewItems) {
final PublicationImportReport report = new PublicationImportReport();
report.setType(ArticleInJournal.BASE_DATA_OBJECT_TYPE);
final ArticleInJournal article = new ArticleInJournal();
processTitle(dataset, article, report, pretend);
processYear(dataset, pretend, article, report);
processAuthors(dataset, RisField.AU, importerUtil, article, report, pretend);
processField(dataset, RisField.AB, article, "abstract", report, pretend);
processJournal(dataset, RisField.T2, article, importerUtil, pretend, report);
processField(dataset, RisField.M1, article, "issue", report, pretend);
processPages(dataset, RisField.SP, article, pretend, report);
processField(dataset, RisField.VL, article, "volume", report, pretend);
processIntField(dataset, RisField.M2, article, "pagesFrom", report, pretend);
return report;
}
@Override
public RisType getRisType() {
return RisType.JOUR;
}
}