SciPublications: Importer for RIS format finished (hopefully)

git-svn-id: https://svn.libreccm.org/ccm/trunk@2039 8810af33-2d31-482b-a856-94f89814c4df
master
jensp 2013-01-14 14:08:02 +00:00
parent e9d6663fa8
commit 00bde55770
14 changed files with 202 additions and 99 deletions

View File

@ -23,8 +23,11 @@ import com.arsdigita.cms.scipublications.importer.report.ImportReport;
import com.arsdigita.util.cmd.Program;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
@ -43,7 +46,7 @@ public class ImporterCli extends Program {
private static final String LIST = "list";
public ImporterCli() {
super("ImporterCli", "1.0.0", "ImporterCli [--pretend] [--publish] file | dir OR ImporterCLI --list");
super("ImporterCli", "1.0.0", "ImporterCli [--pretend] [--publish] file | directory [parameters for importer] OR ImporterCLI --list");
final Options options = getOptions();
@ -65,6 +68,15 @@ public class ImporterCli extends Program {
new ImporterCli().run(args);
}
@Override
public void help(final OutputStream stream) {
super.help(stream);
final PrintWriter writer = new PrintWriter(stream);
writer.append("parameters for importer: Optional parameters for the importer, provided in the following format:");
writer.append("parameter1=value1;parameter2=value2;...");
}
@Override
protected void doRun(final CommandLine cmdLine) {
try {
@ -88,7 +100,7 @@ public class ImporterCli extends Program {
final boolean pretend = cmdLine.hasOption(PRETEND);
final boolean publish = cmdLine.hasOption(PUBLISH);
if (cmdLine.getArgs().length != 1) {
if (cmdLine.getArgs().length < 1) {
errWriter.printf("Missing file/directory to import.\n");
errWriter.flush();
help(System.err);
@ -96,8 +108,20 @@ public class ImporterCli extends Program {
}
final String sourceName = cmdLine.getArgs()[0];
final Map<String, String> importerParams = new HashMap<String, String>();
if (cmdLine.getArgs().length >= 2) {
final String importerParamsStr = cmdLine.getArgs()[1];
final String[] tokens = importerParamsStr.split(";");
for(String token : tokens) {
final String[] valueTokens = token.split("=");
if (valueTokens.length == 2) {
importerParams.put(valueTokens[0], valueTokens[1]);
}
}
}
final File source = new File(sourceName);
importFile(source, pretend, publish);
importFile(source, importerParams, pretend, publish);
errWriter.flush();
writer.flush();
@ -106,7 +130,9 @@ public class ImporterCli extends Program {
}
}
protected void importFile(final File file, final boolean pretend, final boolean publish) {
protected void importFile(final File file,
final Map<String, String> importerParams,
final boolean pretend, final boolean publish) {
final PrintWriter writer = new PrintWriter(System.out);
final PrintWriter errWriter = new PrintWriter(System.err);
@ -117,7 +143,7 @@ public class ImporterCli extends Program {
if (file.isDirectory()) {
final File[] files = file.listFiles();
for (File f : files) {
importFile(f, pretend, publish);
importFile(f, importerParams, pretend, publish);
}
} else if (file.isFile()) {
final String fileName = file.getName();
@ -145,7 +171,7 @@ public class ImporterCli extends Program {
writer.flush();
final ImportReport report;
try {
report = importer.importPublications(data, pretend, publish);
report = importer.importPublications(data, importerParams, pretend, publish);
} catch (SciPublicationsImportException ex) {
errWriter.printf("Import failed:\n");

View File

@ -20,6 +20,7 @@ package com.arsdigita.cms.scipublications.importer;
import com.arsdigita.cms.scipublications.imexporter.PublicationFormat;
import com.arsdigita.cms.scipublications.importer.report.ImportReport;
import java.util.Map;
/**
* Interface for publication importers
@ -38,13 +39,17 @@ public interface SciPublicationsImporter {
* Parses the provided string and creates publications from the string.
*
* @param publications The string conaining the publications in the format supported by this importer
* @param importerParams Optional parameters for the importer
* @param pretend If set to {@code true} no publications will be created. This can be used for debugging purposes
* or to check an file containing publications.
* @param publishNewItems If set to {@code true} the items created by the importer will also be published.
* @return A report describing what the importer has done.
* @throws SciPublicationsImportException If a none recoverable error occurs
*/
ImportReport importPublications(String publications, boolean pretend, boolean publishNewItems)
ImportReport importPublications(String publications,
Map<String, String> importerParams,
boolean pretend,
boolean publishNewItems)
throws SciPublicationsImportException;
}

View File

@ -4,6 +4,7 @@ import com.arsdigita.cms.scipublications.imexporter.PublicationFormat;
import com.arsdigita.cms.scipublications.importer.SciPublicationsImportException;
import com.arsdigita.cms.scipublications.importer.SciPublicationsImporter;
import com.arsdigita.cms.scipublications.importer.report.ImportReport;
import java.util.Map;
import javax.activation.MimeType;
import javax.activation.MimeTypeParseException;
import org.apache.log4j.Logger;
@ -28,6 +29,7 @@ public class BibTeXPublicationsImporter implements SciPublicationsImporter {
}
public ImportReport importPublications(final String publications,
final Map<String, String> importerParams,
final boolean pretend,
final boolean publishNewItems)
throws SciPublicationsImportException {

View File

@ -7,6 +7,7 @@ import com.arsdigita.cms.scipublications.importer.report.ImportReport;
import com.arsdigita.cms.scipublications.importer.report.PublicationImportReport;
import com.arsdigita.cms.scipublications.importer.util.ImporterUtil;
import java.util.List;
import java.util.Map;
import javax.activation.MimeType;
import javax.activation.MimeTypeParseException;
import org.apache.log4j.Logger;
@ -41,9 +42,16 @@ public class RisImporter implements SciPublicationsImporter {
}
public ImportReport importPublications(final String publications,
final Map<String, String> importerParams,
final boolean pretend,
final boolean publishNewItems) throws SciPublicationsImportException {
final String[] lines = publications.split("\r\n");
String lineBreak = "\r\n";
if (importerParams.containsKey("linebreak")) {
lineBreak = importerParams.get("linebreak");
LOGGER.warn("Using user provided linebreak sequence.");
}
final String[] lines = publications.split(lineBreak);
final RisParser parser = new RisParser();
final List<RisDataset> datasets = parser.parse(lines);

View File

@ -13,6 +13,7 @@ import java.util.List;
* @author Jens Pelzetter <jens@jp-digital.de>
* @version $Id$
*/
@SuppressWarnings("PMD.CyclomaticComplexity")
public class RisParser {
public RisParser() {
@ -26,6 +27,11 @@ public class RisParser {
RisFieldValue field;
for (int i = 0; i < lines.length; i++) {
if ((lines[i] == null) || lines[i].isEmpty()) {
continue;
}
field = parseRisLine(lines[i], i);
if (RisField.TY.equals(field.getName())) {
@ -70,7 +76,7 @@ public class RisParser {
}
private RisFieldValue parseRisLine(final String line, final int index) throws SciPublicationsImportException {
final String[] tokens = line.split(" - ");
final String[] tokens = skipBom(line).split(" - ");
if (tokens.length == 2) {
final RisField fieldName;
@ -89,4 +95,26 @@ public class RisParser {
}
}
/**
* Skip possible UTF-8 BOM
*
* @param str
* @return
*/
private String skipBom(final String str) {
if ((str == null) || str.isEmpty()) {
return null;
}
final char firstChar = str.charAt(0);
// Hex value of BOM = EF BB BF => int 65279
if (firstChar == 65279) {
return str.substring(1);
} else {
return str;
}
}
}

View File

@ -45,7 +45,7 @@ public abstract class AbstractRisConverter<T extends Publication, B extends Publ
publication.setLanguage(Kernel.getConfig().getLanguagesIndependentCode());
}
final B bundle = createBundle(publication, pretend);
importReport.setTitle(publication.BASE_DATA_OBJECT_TYPE);
importReport.setType(publication.BASE_DATA_OBJECT_TYPE);
processFields(dataset, publication, importerUtil, importReport, pretend);

View File

@ -62,7 +62,7 @@ public class BookConverter extends AbstractRisConverter<Monograph, PublicationBu
fieldUtil.processField(dataset, RisField.AB, publication, "abstract", importReport);
fieldUtil.processField(dataset, RisField.ET, publication, "edition", importReport);
fieldUtil.processIntField(dataset, RisField.NV, publication, "numberOfVolumes", importReport);
fieldUtil.processField(dataset, RisField.SN, publication, "isbn", importReport);
fieldUtil.processIsbn(dataset, RisField.SN, publication, importReport);
fieldUtil.processIntField(dataset, RisField.SP, publication, "numberOfPages", importReport);
fieldUtil.processIntField(dataset, RisField.VL, publication, "volume", importReport);

View File

@ -65,7 +65,7 @@ public class ConfConverter extends AbstractRisConverter<Proceedings, Proceedings
fieldUtil.processDateField(dataset, RisField.DA, publication, "dateFromOfConference", importReport);
fieldUtil.processField(dataset, RisField.ET, publication, "edition", importReport);
fieldUtil.processIntField(dataset, RisField.NV, publication, "numberOfVolumes", importReport);
fieldUtil.processField(dataset, RisField.SN, publication, "isbn", importReport);
fieldUtil.processIsbn(dataset, RisField.SN, publication, importReport);
fieldUtil.processIntField(dataset, RisField.SP, publication, "numberOfPages", importReport);
fieldUtil.processField(dataset, RisField.T2, publication, "nameOfConference", importReport);
fieldUtil.processIntField(dataset, RisField.VL, publication, "volume", importReport);

View File

@ -56,7 +56,7 @@ public class EbookConverter extends AbstractRisConverter<Monograph, PublicationW
fieldUtil.processField(dataset, RisField.ET, publication, "edition", importReport);
fieldUtil.processField(dataset, RisField.SN, publication, "isbn", importReport);
fieldUtil.processIsbn(dataset, RisField.SN, publication, importReport);
fieldUtil.processIntField(dataset, RisField.SP, publication, "numberOfPages", importReport);

View File

@ -56,7 +56,7 @@ public class EdbookConverter extends AbstractRisConverter<CollectedVolume, Colle
fieldUtil.processField(dataset, RisField.AB, publication, "abstract", importReport);
fieldUtil.processField(dataset, RisField.ET, publication, "edition", importReport);
fieldUtil.processIntField(dataset, RisField.NV, publication, "numberOfVolumes", importReport);
fieldUtil.processField(dataset, RisField.SN, publication, "isbn", importReport);
fieldUtil.processIsbn(dataset, RisField.SN, publication, importReport);
fieldUtil.processIntField(dataset, RisField.SP, publication, "numberOfPages", importReport);
fieldUtil.processIntField(dataset, RisField.VL, publication, "volume", importReport);
}

View File

@ -10,7 +10,6 @@ import com.arsdigita.cms.scipublications.importer.ris.converters.utils.RisAuthor
import com.arsdigita.cms.scipublications.importer.ris.converters.utils.RisFieldUtil;
import com.arsdigita.cms.scipublications.importer.ris.converters.utils.RisJournalUtil;
import com.arsdigita.cms.scipublications.importer.util.ImporterUtil;
import com.arsdigita.kernel.Kernel;
/**
*
@ -60,7 +59,7 @@ public class JourConverter extends AbstractRisConverter<ArticleInJournal, Articl
fieldUtil.processPages(dataset, RisField.SP, publication, importReport);
fieldUtil.processField(dataset, RisField.VL, publication, "volume", importReport);
fieldUtil.processIntField(dataset, RisField.VL, publication, "volume", importReport);
fieldUtil.processIntField(dataset, RisField.M2, publication, "pagesFrom", importReport);
}

View File

@ -45,11 +45,13 @@ public class RisColVolUtil {
final List<String> colVolEditors = dataset.getValues().get(cvEditorsField);
final List<AuthorData> colVolEditorData = new ArrayList<AuthorData>();
if ((colVolEditors != null) && !colVolEditors.isEmpty()) {
for (String colVolEditor : colVolEditors) {
final String[] tokens = colVolEditor.split(",");
colVolEditorData.add(createAuthorData(tokens));
}
}
if ((colVolTitle != null) && !colVolTitle.isEmpty()) {
final CollectedVolumeImportReport colVolReport = importerUtil.processCollectedVolume(article,

View File

@ -61,7 +61,6 @@ public class RisFieldUtil {
replaceAll("[^a-zA-Z0-9\\-]", "").toLowerCase().trim();
}
public void processField(final RisDataset dataset,
final RisField field,
final Publication publication,
@ -70,9 +69,17 @@ public class RisFieldUtil {
final List<String> values = dataset.getValues().get(field);
if ((values != null) && !values.isEmpty()) {
if (!pretend) {
if ((values.get(0) != null) && values.get(0).length() < 4096) {
publication.set(targetField, values.get(0));
} else {
publication.set(targetField, values.get(0).substring(0, 4096));
}
}
if ((values.get(0) != null) && values.get(0).length() < 4096) {
report.addField(new FieldImportReport(targetField, values.get(0)));
} else {
report.addField(new FieldImportReport(targetField, values.get(0).substring(0, 4096)));
}
}
}
@ -147,11 +154,16 @@ public class RisFieldUtil {
}
}
@SuppressWarnings("PMD.CyclomaticComplexity")
public void processPages(final RisDataset dataset,
final RisField field,
final Publication publication,
final PublicationImportReport report) {
final List<String> values = dataset.getValues().get(field);
if ((values == null) || values.isEmpty()) {
return;
}
final String pages = values.get(0);
final String[] tokens = pages.split("-");
if (tokens.length == 2) {
@ -185,4 +197,25 @@ public class RisFieldUtil {
}
}
public void processIsbn(final RisDataset dataset,
final RisField field,
final Publication publication,
final PublicationImportReport report) {
final List<String> values = dataset.getValues().get(field);
if ((values == null) || values.isEmpty()) {
return;
}
String isbn = values.get(0);
isbn = isbn.replace("-", "");
if (isbn.length() > 17) {
isbn = isbn.substring(0, 17);
}
if (!pretend) {
publication.set("isbn", isbn);
}
report.addField(new FieldImportReport("ISBN", isbn));
}
}

View File

@ -4,6 +4,7 @@ import com.arsdigita.cms.ContentItem;
import com.arsdigita.cms.Folder;
import com.arsdigita.cms.contenttypes.ArticleInCollectedVolume;
import com.arsdigita.cms.contenttypes.ArticleInJournal;
import com.arsdigita.cms.contenttypes.AuthorshipCollection;
import com.arsdigita.cms.contenttypes.CollectedVolume;
import com.arsdigita.cms.contenttypes.CollectedVolumeBundle;
import com.arsdigita.cms.contenttypes.GenericOrganizationalUnit;
@ -578,7 +579,6 @@ public class ImporterUtil {
return report;
}
public SeriesImportReport processSeries(final Publication publication,
final String seriesTitle,
final boolean pretend) {