diff --git a/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/ImporterCli.java b/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/ImporterCli.java index 2ad1de229..b905bae52 100644 --- a/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/ImporterCli.java +++ b/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/ImporterCli.java @@ -23,8 +23,11 @@ import com.arsdigita.cms.scipublications.importer.report.ImportReport; import com.arsdigita.util.cmd.Program; import java.io.File; import java.io.IOException; +import java.io.OutputStream; import java.io.PrintWriter; +import java.util.HashMap; import java.util.List; +import java.util.Map; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; @@ -43,7 +46,7 @@ public class ImporterCli extends Program { private static final String LIST = "list"; public ImporterCli() { - super("ImporterCli", "1.0.0", "ImporterCli [--pretend] [--publish] file | dir OR ImporterCLI --list"); + super("ImporterCli", "1.0.0", "ImporterCli [--pretend] [--publish] file | directory [parameters for importer] OR ImporterCLI --list"); final Options options = getOptions(); @@ -58,55 +61,78 @@ public class ImporterCli extends Program { options.addOption(OptionBuilder .withLongOpt(LIST) .withDescription("List all available importers and exit") - .create()); + .create()); } public static void main(final String args[]) { new ImporterCli().run(args); } + + @Override + public void help(final OutputStream stream) { + super.help(stream); + + final PrintWriter writer = new PrintWriter(stream); + writer.append("parameters for importer: Optional parameters for the importer, provided in the following format:"); + writer.append("parameter1=value1;parameter2=value2;..."); + } @Override protected void doRun(final CommandLine cmdLine) { try { - final PrintWriter writer = new PrintWriter(System.out); - final PrintWriter errWriter = new PrintWriter(System.err); + final PrintWriter writer = new PrintWriter(System.out); + final PrintWriter errWriter = new PrintWriter(System.err); - writer.printf("Publications Importer CLI tool.\n"); - writer.flush(); - if (cmdLine.hasOption(LIST)) { - final List formats = SciPublicationsImporters.getInstance().getSupportedFormats(); - writer.printf("Supported formats:\n"); - for (PublicationFormat format : formats) { - writer.printf("%s, MIME type: %s, file extension: %s\n", format.getName(), - format.getMimeType().toString(), - format.getFileExtension()); - } + writer.printf("Publications Importer CLI tool.\n"); writer.flush(); - return; - } + if (cmdLine.hasOption(LIST)) { + final List formats = SciPublicationsImporters.getInstance().getSupportedFormats(); + writer.printf("Supported formats:\n"); + for (PublicationFormat format : formats) { + writer.printf("%s, MIME type: %s, file extension: %s\n", format.getName(), + format.getMimeType().toString(), + format.getFileExtension()); + } + writer.flush(); + return; + } - final boolean pretend = cmdLine.hasOption(PRETEND); - final boolean publish = cmdLine.hasOption(PUBLISH); + final boolean pretend = cmdLine.hasOption(PRETEND); + final boolean publish = cmdLine.hasOption(PUBLISH); + + if (cmdLine.getArgs().length < 1) { + errWriter.printf("Missing file/directory to import.\n"); + errWriter.flush(); + help(System.err); + return; + } + + final String sourceName = cmdLine.getArgs()[0]; + final Map importerParams = new HashMap(); + if (cmdLine.getArgs().length >= 2) { + final String importerParamsStr = cmdLine.getArgs()[1]; + final String[] tokens = importerParamsStr.split(";"); + for(String token : tokens) { + final String[] valueTokens = token.split("="); + if (valueTokens.length == 2) { + importerParams.put(valueTokens[0], valueTokens[1]); + } + } + } + + final File source = new File(sourceName); + importFile(source, importerParams, pretend, publish); - if (cmdLine.getArgs().length != 1) { - errWriter.printf("Missing file/directory to import.\n"); errWriter.flush(); - help(System.err); - return; - } - - final String sourceName = cmdLine.getArgs()[0]; - final File source = new File(sourceName); - importFile(source, pretend, publish); - - errWriter.flush(); - writer.flush(); - } catch(Exception ex) { + writer.flush(); + } catch (Exception ex) { ex.printStackTrace(System.err); } } - protected void importFile(final File file, final boolean pretend, final boolean publish) { + protected void importFile(final File file, + final Map importerParams, + final boolean pretend, final boolean publish) { final PrintWriter writer = new PrintWriter(System.out); final PrintWriter errWriter = new PrintWriter(System.err); @@ -117,7 +143,7 @@ public class ImporterCli extends Program { if (file.isDirectory()) { final File[] files = file.listFiles(); for (File f : files) { - importFile(f, pretend, publish); + importFile(f, importerParams, pretend, publish); } } else if (file.isFile()) { final String fileName = file.getName(); @@ -145,7 +171,7 @@ public class ImporterCli extends Program { writer.flush(); final ImportReport report; try { - report = importer.importPublications(data, pretend, publish); + report = importer.importPublications(data, importerParams, pretend, publish); } catch (SciPublicationsImportException ex) { errWriter.printf("Import failed:\n"); diff --git a/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/SciPublicationsImporter.java b/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/SciPublicationsImporter.java index 0874c92eb..f73bfbda3 100644 --- a/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/SciPublicationsImporter.java +++ b/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/SciPublicationsImporter.java @@ -20,6 +20,7 @@ package com.arsdigita.cms.scipublications.importer; import com.arsdigita.cms.scipublications.imexporter.PublicationFormat; import com.arsdigita.cms.scipublications.importer.report.ImportReport; +import java.util.Map; /** * Interface for publication importers @@ -38,13 +39,17 @@ public interface SciPublicationsImporter { * Parses the provided string and creates publications from the string. * * @param publications The string conaining the publications in the format supported by this importer + * @param importerParams Optional parameters for the importer * @param pretend If set to {@code true} no publications will be created. This can be used for debugging purposes * or to check an file containing publications. * @param publishNewItems If set to {@code true} the items created by the importer will also be published. * @return A report describing what the importer has done. * @throws SciPublicationsImportException If a none recoverable error occurs */ - ImportReport importPublications(String publications, boolean pretend, boolean publishNewItems) + ImportReport importPublications(String publications, + Map importerParams, + boolean pretend, + boolean publishNewItems) throws SciPublicationsImportException; } diff --git a/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/bibtex/BibTeXPublicationsImporter.java b/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/bibtex/BibTeXPublicationsImporter.java index 66cbe3fb7..d21cdb1e6 100644 --- a/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/bibtex/BibTeXPublicationsImporter.java +++ b/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/bibtex/BibTeXPublicationsImporter.java @@ -4,6 +4,7 @@ import com.arsdigita.cms.scipublications.imexporter.PublicationFormat; import com.arsdigita.cms.scipublications.importer.SciPublicationsImportException; import com.arsdigita.cms.scipublications.importer.SciPublicationsImporter; import com.arsdigita.cms.scipublications.importer.report.ImportReport; +import java.util.Map; import javax.activation.MimeType; import javax.activation.MimeTypeParseException; import org.apache.log4j.Logger; @@ -28,6 +29,7 @@ public class BibTeXPublicationsImporter implements SciPublicationsImporter { } public ImportReport importPublications(final String publications, + final Map importerParams, final boolean pretend, final boolean publishNewItems) throws SciPublicationsImportException { diff --git a/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/ris/RisImporter.java b/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/ris/RisImporter.java index 481ac97da..82dd995bd 100644 --- a/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/ris/RisImporter.java +++ b/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/ris/RisImporter.java @@ -7,6 +7,7 @@ import com.arsdigita.cms.scipublications.importer.report.ImportReport; import com.arsdigita.cms.scipublications.importer.report.PublicationImportReport; import com.arsdigita.cms.scipublications.importer.util.ImporterUtil; import java.util.List; +import java.util.Map; import javax.activation.MimeType; import javax.activation.MimeTypeParseException; import org.apache.log4j.Logger; @@ -41,9 +42,16 @@ public class RisImporter implements SciPublicationsImporter { } public ImportReport importPublications(final String publications, + final Map importerParams, final boolean pretend, final boolean publishNewItems) throws SciPublicationsImportException { - final String[] lines = publications.split("\r\n"); + String lineBreak = "\r\n"; + if (importerParams.containsKey("linebreak")) { + lineBreak = importerParams.get("linebreak"); + LOGGER.warn("Using user provided linebreak sequence."); + } + + final String[] lines = publications.split(lineBreak); final RisParser parser = new RisParser(); final List datasets = parser.parse(lines); diff --git a/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/ris/RisParser.java b/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/ris/RisParser.java index 92b8dbe6d..3f149b962 100644 --- a/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/ris/RisParser.java +++ b/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/ris/RisParser.java @@ -13,6 +13,7 @@ import java.util.List; * @author Jens Pelzetter * @version $Id$ */ +@SuppressWarnings("PMD.CyclomaticComplexity") public class RisParser { public RisParser() { @@ -26,6 +27,11 @@ public class RisParser { RisFieldValue field; for (int i = 0; i < lines.length; i++) { + + if ((lines[i] == null) || lines[i].isEmpty()) { + continue; + } + field = parseRisLine(lines[i], i); if (RisField.TY.equals(field.getName())) { @@ -43,10 +49,10 @@ public class RisParser { } } else if (RisField.ER.equals(field.getName())) { openDataset = false; - } else if(field.getName() == null) { + } else if (field.getName() == null) { final RisDataset currentDataset = entries.get(entries.size() - 1); final List data = currentDataset.getValues().get(lastField); - data.set(data.size() - 1, data.get(data.size() - 1) + field.getValue()); + data.set(data.size() - 1, data.get(data.size() - 1) + field.getValue()); } else { final RisDataset currentDataset = entries.get(entries.size() - 1); if (currentDataset.getValues().get(field.getName()) == null) { @@ -70,7 +76,7 @@ public class RisParser { } private RisFieldValue parseRisLine(final String line, final int index) throws SciPublicationsImportException { - final String[] tokens = line.split(" - "); + final String[] tokens = skipBom(line).split(" - "); if (tokens.length == 2) { final RisField fieldName; @@ -89,4 +95,26 @@ public class RisParser { } } + /** + * Skip possible UTF-8 BOM + * + * @param str + * @return + */ + private String skipBom(final String str) { + if ((str == null) || str.isEmpty()) { + return null; + } + + final char firstChar = str.charAt(0); + + + // Hex value of BOM = EF BB BF => int 65279 + if (firstChar == 65279) { + return str.substring(1); + } else { + return str; + } + } + } diff --git a/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/ris/converters/AbstractRisConverter.java b/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/ris/converters/AbstractRisConverter.java index 8222cea3a..29ed11d9e 100644 --- a/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/ris/converters/AbstractRisConverter.java +++ b/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/ris/converters/AbstractRisConverter.java @@ -40,12 +40,12 @@ public abstract class AbstractRisConverter { - + @Override protected Proceedings createPublication(final boolean pretend) { if (pretend) { @@ -65,7 +65,7 @@ public class ConfConverter extends AbstractRisConverter colVolEditors = dataset.getValues().get(cvEditorsField); final List colVolEditorData = new ArrayList(); - for (String colVolEditor : colVolEditors) { - final String[] tokens = colVolEditor.split(","); + if ((colVolEditors != null) && !colVolEditors.isEmpty()) { + for (String colVolEditor : colVolEditors) { + final String[] tokens = colVolEditor.split(","); - colVolEditorData.add(createAuthorData(tokens)); + colVolEditorData.add(createAuthorData(tokens)); + } } if ((colVolTitle != null) && !colVolTitle.isEmpty()) { diff --git a/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/ris/converters/utils/RisFieldUtil.java b/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/ris/converters/utils/RisFieldUtil.java index df73e8ee2..23b61d1c3 100644 --- a/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/ris/converters/utils/RisFieldUtil.java +++ b/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/ris/converters/utils/RisFieldUtil.java @@ -43,13 +43,13 @@ public class RisFieldUtil { if (name.length() > 200) { name = name.substring(0, 200); } - publication.setName(name); + publication.setName(name); } report.setTitle(title); } - - private String normalizeString(final String str) { + + private String normalizeString(final String str) { if (str == null) { return "null"; } @@ -61,7 +61,6 @@ public class RisFieldUtil { replaceAll("[^a-zA-Z0-9\\-]", "").toLowerCase().trim(); } - public void processField(final RisDataset dataset, final RisField field, final Publication publication, @@ -70,9 +69,17 @@ public class RisFieldUtil { final List values = dataset.getValues().get(field); if ((values != null) && !values.isEmpty()) { if (!pretend) { - publication.set(targetField, values.get(0)); + if ((values.get(0) != null) && values.get(0).length() < 4096) { + publication.set(targetField, values.get(0)); + } else { + publication.set(targetField, values.get(0).substring(0, 4096)); + } + } + if ((values.get(0) != null) && values.get(0).length() < 4096) { + report.addField(new FieldImportReport(targetField, values.get(0))); + } else { + report.addField(new FieldImportReport(targetField, values.get(0).substring(0, 4096))); } - report.addField(new FieldImportReport(targetField, values.get(0))); } } @@ -147,11 +154,16 @@ public class RisFieldUtil { } } + @SuppressWarnings("PMD.CyclomaticComplexity") public void processPages(final RisDataset dataset, final RisField field, final Publication publication, final PublicationImportReport report) { final List values = dataset.getValues().get(field); + if ((values == null) || values.isEmpty()) { + return; + } + final String pages = values.get(0); final String[] tokens = pages.split("-"); if (tokens.length == 2) { @@ -185,4 +197,25 @@ public class RisFieldUtil { } } + public void processIsbn(final RisDataset dataset, + final RisField field, + final Publication publication, + final PublicationImportReport report) { + final List values = dataset.getValues().get(field); + if ((values == null) || values.isEmpty()) { + return; + } + + String isbn = values.get(0); + isbn = isbn.replace("-", ""); + if (isbn.length() > 17) { + isbn = isbn.substring(0, 17); + } + + if (!pretend) { + publication.set("isbn", isbn); + } + report.addField(new FieldImportReport("ISBN", isbn)); + } + } diff --git a/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/util/ImporterUtil.java b/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/util/ImporterUtil.java index 57d53136a..4c2eb104d 100644 --- a/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/util/ImporterUtil.java +++ b/ccm-sci-publications/src/com/arsdigita/cms/scipublications/importer/util/ImporterUtil.java @@ -4,6 +4,7 @@ import com.arsdigita.cms.ContentItem; import com.arsdigita.cms.Folder; import com.arsdigita.cms.contenttypes.ArticleInCollectedVolume; import com.arsdigita.cms.contenttypes.ArticleInJournal; +import com.arsdigita.cms.contenttypes.AuthorshipCollection; import com.arsdigita.cms.contenttypes.CollectedVolume; import com.arsdigita.cms.contenttypes.CollectedVolumeBundle; import com.arsdigita.cms.contenttypes.GenericOrganizationalUnit; @@ -258,9 +259,9 @@ public class ImporterUtil { if ((publisherName != null) && !publisherName.isEmpty()) { report.setPublisher(processPublisher(collectedVolume, place, publisherName, pretend)); } - + if ((edition != null) && !edition.isEmpty()) { - collectedVolume.setEdition(edition); + collectedVolume.setEdition(edition); } collectedVolume.save(); @@ -578,51 +579,50 @@ public class ImporterUtil { return report; } - public SeriesImportReport processSeries(final Publication publication, - final String seriesTitle, - final boolean pretend) { + final String seriesTitle, + final boolean pretend) { final SeriesImportReport report = new SeriesImportReport(); - + final Session session = SessionManager.getSession(); final DataCollection collection = session.retrieve(Series.BASE_DATA_OBJECT_TYPE); collection.addEqualsFilter("title", seriesTitle); - + report.setSeriesTitle(seriesTitle); if (collection.isEmpty()) { - if (!pretend) { - final Integer folderId = Publication.getConfig().getDefaultSeriesFolder(); - final Folder folder = new Folder(new BigDecimal(folderId)); - if (folder == null) { - throw new IllegalArgumentException("Error getting folder for series."); - } - - final Series series = new Series(); - series.setName(normalizeString(seriesTitle)); - series.setTitle(seriesTitle); - series.setLanguage(Kernel.getConfig().getLanguagesIndependentCode()); - series.setContentSection(folder.getContentSection()); - series.save(); - - final SeriesBundle bundle = new SeriesBundle(series); - bundle.setParent(folder); - bundle.setContentSection(folder.getContentSection()); - bundle.save(); - - publication.addSeries(series); - - if (publish) { - publishItem(series); - } - } - report.setCreated(true); - - //Special handling for pretend mode - if (pretend && createdSeries.contains(seriesTitle)) { - report.setCreated(false); - } else { - createdSeries.add(seriesTitle); - } + if (!pretend) { + final Integer folderId = Publication.getConfig().getDefaultSeriesFolder(); + final Folder folder = new Folder(new BigDecimal(folderId)); + if (folder == null) { + throw new IllegalArgumentException("Error getting folder for series."); + } + + final Series series = new Series(); + series.setName(normalizeString(seriesTitle)); + series.setTitle(seriesTitle); + series.setLanguage(Kernel.getConfig().getLanguagesIndependentCode()); + series.setContentSection(folder.getContentSection()); + series.save(); + + final SeriesBundle bundle = new SeriesBundle(series); + bundle.setParent(folder); + bundle.setContentSection(folder.getContentSection()); + bundle.save(); + + publication.addSeries(series); + + if (publish) { + publishItem(series); + } + } + report.setCreated(true); + + //Special handling for pretend mode + if (pretend && createdSeries.contains(seriesTitle)) { + report.setCreated(false); + } else { + createdSeries.add(seriesTitle); + } } else { if (!pretend) { collection.next(); @@ -631,9 +631,9 @@ public class ImporterUtil { } report.setCreated(false); } - + collection.close(); - + return report; }