diff --git a/ccm-ldn-terms/pdl/com/arsdigita/london/terms/indexing/Indexer.pdl b/ccm-ldn-terms/pdl/com/arsdigita/london/terms/indexing/Indexer.pdl index acf9b3451..4a1e164da 100644 --- a/ccm-ldn-terms/pdl/com/arsdigita/london/terms/indexing/Indexer.pdl +++ b/ccm-ldn-terms/pdl/com/arsdigita/london/terms/indexing/Indexer.pdl @@ -1,34 +1,34 @@ -// Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. -// -// This library is free software; you can redistribute it and/or modify it under -// the terms of the GNU Lesser General Public License as published by the Free -// Software Foundation; either version 2.1 of the License, or (at your option) -// any later version. -// -// This library is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more -// details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with this library; if not, write to the Free Software Foundation, Inc., -// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -model com.arsdigita.london.terms.indexing; - -import com.arsdigita.kernel.ACSObject; -import com.arsdigita.kernel.Party; -import com.arsdigita.london.terms.Domain; - -object type Indexer extends ACSObject { - Blob[1..1] filter = trm_domains_indexer.filter BLOB; - Date[1..1] lastModifiedDate = trm_domains_indexer.last_modified_date TIMESTAMP; - Party[1..1] lastModifiedUser = join trm_domains_indexer.last_modified_user to parties.party_id; - reference key (trm_domains_indexer.indexer_id); -} - -association { - component Indexer[0..1] indexer = join trm_domains.key to trm_domains_indexer.key; - Domain[1..1] domain = join trm_domains_indexer.key to trm_domains.key; -} - +// Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 2.1 of the License, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +model com.arsdigita.london.terms.indexing; + +import com.arsdigita.kernel.ACSObject; +import com.arsdigita.kernel.Party; +import com.arsdigita.london.terms.Domain; + +object type Indexer extends ACSObject { + Blob[1..1] filter = trm_domains_indexer.filter BLOB; + Date[1..1] lastModifiedDate = trm_domains_indexer.last_modified_date TIMESTAMP; + Party[1..1] lastModifiedUser = join trm_domains_indexer.last_modified_user to parties.party_id; + reference key (trm_domains_indexer.indexer_id); +} + +association { + component Indexer[0..1] indexer = join trm_domains.key to trm_domains_indexer.key; + Domain[1..1] domain = join trm_domains_indexer.key to trm_domains.key; +} + diff --git a/ccm-ldn-terms/sql/ccm-ldn-terms/oracle-se/upgrade/6.5.0-6.5.1/table-trm_domains_indexer-deferred.sql b/ccm-ldn-terms/sql/ccm-ldn-terms/oracle-se/upgrade/6.5.0-6.5.1/table-trm_domains_indexer-deferred.sql index 8dc2e8aa0..4fd77ea6a 100644 --- a/ccm-ldn-terms/sql/ccm-ldn-terms/oracle-se/upgrade/6.5.0-6.5.1/table-trm_domains_indexer-deferred.sql +++ b/ccm-ldn-terms/sql/ccm-ldn-terms/oracle-se/upgrade/6.5.0-6.5.1/table-trm_domains_indexer-deferred.sql @@ -1,6 +1,6 @@ -alter table trm_domains_indexer add - constraint trm_dom_ind_las_mod_us_f_1k1i3 foreign key (last_modified_user) - references parties(party_id); -alter table trm_domains_indexer add - constraint trm_domain_indexer_key_f_lghsq foreign key (key) - references trm_domains(key); +alter table trm_domains_indexer add + constraint trm_dom_ind_las_mod_us_f_1k1i3 foreign key (last_modified_user) + references parties(party_id); +alter table trm_domains_indexer add + constraint trm_domain_indexer_key_f_lghsq foreign key (key) + references trm_domains(key); diff --git a/ccm-ldn-terms/sql/ccm-ldn-terms/oracle-se/upgrade/6.5.1-6.5.2/change-unique_id-to-varchar.sql b/ccm-ldn-terms/sql/ccm-ldn-terms/oracle-se/upgrade/6.5.1-6.5.2/change-unique_id-to-varchar.sql index 165eb6a5a..8a0222244 100644 --- a/ccm-ldn-terms/sql/ccm-ldn-terms/oracle-se/upgrade/6.5.1-6.5.2/change-unique_id-to-varchar.sql +++ b/ccm-ldn-terms/sql/ccm-ldn-terms/oracle-se/upgrade/6.5.1-6.5.2/change-unique_id-to-varchar.sql @@ -1,7 +1,7 @@ -alter table trm_terms add (unique_id_string varchar(128) default 'UNKNOWN' not null ); -alter table trm_terms rename column unique_id to unique_id_old; -alter table trm_terms rename column unique_id_string to unique_id; -update trm_terms set unique_id = unique_id_old; -alter table trm_terms drop constraint trm_ter_domai_uniqu_id_u_6sito; -alter table trm_terms add constraint trm_ter_domai_uniqu_id_u_6sito unique(domain, unique_id); -alter table trm_terms drop column unique_id_old; +alter table trm_terms add (unique_id_string varchar(128) default 'UNKNOWN' not null ); +alter table trm_terms rename column unique_id to unique_id_old; +alter table trm_terms rename column unique_id_string to unique_id; +update trm_terms set unique_id = unique_id_old; +alter table trm_terms drop constraint trm_ter_domai_uniqu_id_u_6sito; +alter table trm_terms add constraint trm_ter_domai_uniqu_id_u_6sito unique(domain, unique_id); +alter table trm_terms drop column unique_id_old; diff --git a/ccm-ldn-terms/sql/ccm-ldn-terms/postgres/upgrade/6.5.0-6.5.1/table-trm_domains_indexer-deferred.sql b/ccm-ldn-terms/sql/ccm-ldn-terms/postgres/upgrade/6.5.0-6.5.1/table-trm_domains_indexer-deferred.sql index 8dc2e8aa0..4fd77ea6a 100644 --- a/ccm-ldn-terms/sql/ccm-ldn-terms/postgres/upgrade/6.5.0-6.5.1/table-trm_domains_indexer-deferred.sql +++ b/ccm-ldn-terms/sql/ccm-ldn-terms/postgres/upgrade/6.5.0-6.5.1/table-trm_domains_indexer-deferred.sql @@ -1,6 +1,6 @@ -alter table trm_domains_indexer add - constraint trm_dom_ind_las_mod_us_f_1k1i3 foreign key (last_modified_user) - references parties(party_id); -alter table trm_domains_indexer add - constraint trm_domain_indexer_key_f_lghsq foreign key (key) - references trm_domains(key); +alter table trm_domains_indexer add + constraint trm_dom_ind_las_mod_us_f_1k1i3 foreign key (last_modified_user) + references parties(party_id); +alter table trm_domains_indexer add + constraint trm_domain_indexer_key_f_lghsq foreign key (key) + references trm_domains(key); diff --git a/ccm-ldn-terms/sql/ccm-ldn-terms/postgres/upgrade/6.5.1-6.5.2/change-unique_id-to-varchar.sql b/ccm-ldn-terms/sql/ccm-ldn-terms/postgres/upgrade/6.5.1-6.5.2/change-unique_id-to-varchar.sql index 2443b1744..18998f519 100644 --- a/ccm-ldn-terms/sql/ccm-ldn-terms/postgres/upgrade/6.5.1-6.5.2/change-unique_id-to-varchar.sql +++ b/ccm-ldn-terms/sql/ccm-ldn-terms/postgres/upgrade/6.5.1-6.5.2/change-unique_id-to-varchar.sql @@ -1,7 +1,7 @@ -alter table trm_terms add unique_id_string varchar(128) default 'UNKNOWN' not null; -alter table trm_terms rename column unique_id to unique_id_old; -alter table trm_terms rename column unique_id_string to unique_id; -update trm_terms set unique_id = unique_id_old; -alter table trm_terms drop constraint trm_ter_domai_uniqu_id_u_6sito; -alter table trm_terms add constraint trm_ter_domai_uniqu_id_u_6sito unique(domain, unique_id); -alter table trm_terms drop column unique_id_old; +alter table trm_terms add unique_id_string varchar(128) default 'UNKNOWN' not null; +alter table trm_terms rename column unique_id to unique_id_old; +alter table trm_terms rename column unique_id_string to unique_id; +update trm_terms set unique_id = unique_id_old; +alter table trm_terms drop constraint trm_ter_domai_uniqu_id_u_6sito; +alter table trm_terms add constraint trm_ter_domai_uniqu_id_u_6sito unique(domain, unique_id); +alter table trm_terms drop column unique_id_old; diff --git a/ccm-ldn-terms/src/com/arsdigita/london/terms/Loader.java b/ccm-ldn-terms/src/com/arsdigita/london/terms/Loader.java index 322f568b7..0b9aa19e7 100755 --- a/ccm-ldn-terms/src/com/arsdigita/london/terms/Loader.java +++ b/ccm-ldn-terms/src/com/arsdigita/london/terms/Loader.java @@ -28,7 +28,7 @@ import com.arsdigita.web.ApplicationType; * Loader. * * @author Justin Ross <jross@redhat.com> - * @version $Id: Loader.java 287 2005-02-22 00:29:02Z sskracic $ + * @version $Id: Loader.java 1878 2009-04-21 13:56:23Z terry $ */ public class Loader extends PackageLoader { public void run(final ScriptContext ctx) { diff --git a/ccm-ldn-terms/src/com/arsdigita/london/terms/TermComparators.java b/ccm-ldn-terms/src/com/arsdigita/london/terms/TermComparators.java index 966f03f85..d248adcf2 100644 --- a/ccm-ldn-terms/src/com/arsdigita/london/terms/TermComparators.java +++ b/ccm-ldn-terms/src/com/arsdigita/london/terms/TermComparators.java @@ -1,44 +1,44 @@ -/* - * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. - * - * This library is free software; you can redistribute it and/or modify it under - * the terms of the GNU Lesser General Public License as published by the Free - * Software Foundation; either version 2.1 of the License, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -package com.arsdigita.london.terms; - -import java.io.Serializable; -import java.util.Comparator; - -/** - * Comparators for sorting {@link Term} objects. - * - * @author terry_permeance - */ -public class TermComparators { - - /** - * Compare two {@link Term} object by name, case insensitive. - */ - public static class OrderByName implements Comparator, Serializable { - public int compare(Term o1, Term o2) { - int compare = o1.getName().toLowerCase().compareTo(o2.getName().toLowerCase()); - if (compare == 0) { - compare = o1.getUniqueID().compareTo(o2.getUniqueID()); - } - return compare; - } - } -} +/* + * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. + * + * This library is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +package com.arsdigita.london.terms; + +import java.io.Serializable; +import java.util.Comparator; + +/** + * Comparators for sorting {@link Term} objects. + * + * @author terry_permeance + */ +public class TermComparators { + + /** + * Compare two {@link Term} object by name, case insensitive. + */ + public static class OrderByName implements Comparator, Serializable { + public int compare(Term o1, Term o2) { + int compare = o1.getName().toLowerCase().compareTo(o2.getName().toLowerCase()); + if (compare == 0) { + compare = o1.getUniqueID().compareTo(o2.getUniqueID()); + } + return compare; + } + } +} diff --git a/ccm-ldn-terms/src/com/arsdigita/london/terms/Util.java b/ccm-ldn-terms/src/com/arsdigita/london/terms/Util.java index 4858ec42b..3ec5f42f5 100755 --- a/ccm-ldn-terms/src/com/arsdigita/london/terms/Util.java +++ b/ccm-ldn-terms/src/com/arsdigita/london/terms/Util.java @@ -5,6 +5,7 @@ package com.arsdigita.london.terms; import com.arsdigita.persistence.DataQuery; +import com.arsdigita.domain.DomainCollection; import com.arsdigita.persistence.SessionManager; import com.arsdigita.util.UncheckedWrapperException; import com.arsdigita.web.Application; @@ -33,5 +34,24 @@ public class Util { return applicationDomain; } + + + /** + * retrieve a unique integer to allocate to a new term. + * Useful for applications that dynamically generate terms. + */ + + public static String getNextTermID(Domain domain) { + + DomainCollection terms = domain.getTerms(); + terms.addOrder(Term.UNIQUE_ID + " desc"); + int id = 1; + if(terms.next()) { + Term other = (Term) terms.getDomainObject(); + id = Integer.parseInt(other.getUniqueID()) + 1; + terms.close(); + } + return Integer.toString(id); + } } diff --git a/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/DomainImportTool.java b/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/DomainImportTool.java index 2a0ec42cf..46786af9f 100644 --- a/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/DomainImportTool.java +++ b/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/DomainImportTool.java @@ -1,85 +1,85 @@ -/* - * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. - * - * This library is free software; you can redistribute it and/or modify it under - * the terms of the GNU Lesser General Public License as published by the Free - * Software Foundation; either version 2.1 of the License, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -package com.arsdigita.london.terms.importer.skos; - -import java.sql.Date; - -import org.apache.commons.cli.CommandLine; -import org.apache.log4j.Logger; - -import com.arsdigita.kernel.Kernel; -import com.arsdigita.kernel.KernelExcursion; -import com.arsdigita.london.util.Transaction; -import com.arsdigita.packaging.Program; -import com.arsdigita.util.WrappedError; - -/** - * A tool for importing domains from an SKOS file. - * - * @author terry_permeance - */ -public class DomainImportTool extends Program { - public DomainImportTool() { - super("Domain importer", "1.0.0", "skos-filename.rdf key title description version YYYY-MM-DD"); - } - - protected void doRun(CommandLine cmdLine) { - final String[] args = cmdLine.getArgs(); - if (args.length != 6) { - help(System.err); - System.exit(1); - } - - s_log.info("Importing domain from SKOS file " + args[0]); - - final String key = args[1]; - final String title = args[2]; - final String description = args[3]; - final String version = args[4]; - final Date released = Date.valueOf(args[5]); - - Transaction txn = new Transaction() { - protected void doRun() { - DomainParser parser = new DomainParser(key, title, description, version, released); - parser.parse(args[0]); - } - }; - try { - txn.run(); - } catch (RuntimeException e) { - s_log.error("RDF importer failed unexpectedly", e); - throw e; - } catch (WrappedError e) { - s_log.error("RDF importer failed unexpectedly", e); - throw e; - } - } - - public static void main(final String[] args) { - new KernelExcursion() { - public void excurse() { - setEffectiveParty(Kernel.getSystemParty()); - new DomainImportTool().run(args); - } - }.run(); - } - - private static final Logger s_log = Logger.getLogger(DomainImportTool.class); -} +/* + * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. + * + * This library is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +package com.arsdigita.london.terms.importer.skos; + +import java.sql.Date; + +import org.apache.commons.cli.CommandLine; +import org.apache.log4j.Logger; + +import com.arsdigita.kernel.Kernel; +import com.arsdigita.kernel.KernelExcursion; +import com.arsdigita.london.util.Transaction; +import com.arsdigita.packaging.Program; +import com.arsdigita.util.WrappedError; + +/** + * A tool for importing domains from an SKOS file. + * + * @author terry_permeance + */ +public class DomainImportTool extends Program { + public DomainImportTool() { + super("Domain importer", "1.0.0", "skos-filename.rdf key title description version YYYY-MM-DD"); + } + + protected void doRun(CommandLine cmdLine) { + final String[] args = cmdLine.getArgs(); + if (args.length != 6) { + help(System.err); + System.exit(1); + } + + s_log.info("Importing domain from SKOS file " + args[0]); + + final String key = args[1]; + final String title = args[2]; + final String description = args[3]; + final String version = args[4]; + final Date released = Date.valueOf(args[5]); + + Transaction txn = new Transaction() { + protected void doRun() { + DomainParser parser = new DomainParser(key, title, description, version, released); + parser.parse(args[0]); + } + }; + try { + txn.run(); + } catch (RuntimeException e) { + s_log.error("RDF importer failed unexpectedly", e); + throw e; + } catch (WrappedError e) { + s_log.error("RDF importer failed unexpectedly", e); + throw e; + } + } + + public static void main(final String[] args) { + new KernelExcursion() { + public void excurse() { + setEffectiveParty(Kernel.getSystemParty()); + new DomainImportTool().run(args); + } + }.run(); + } + + private static final Logger s_log = Logger.getLogger(DomainImportTool.class); +} diff --git a/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/DomainParser.java b/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/DomainParser.java index 96338bdf4..b6d18d398 100644 --- a/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/DomainParser.java +++ b/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/DomainParser.java @@ -1,95 +1,95 @@ -/* - * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. - * - * This library is free software; you can redistribute it and/or modify it under - * the terms of the GNU Lesser General Public License as published by the Free - * Software Foundation; either version 2.1 of the License, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -package com.arsdigita.london.terms.importer.skos; - -import java.io.IOException; -import java.net.URL; -import java.sql.Date; - -import org.apache.commons.digester.Digester; -import org.apache.commons.digester.Rule; -import org.xml.sax.Attributes; -import org.xml.sax.SAXException; - -import com.arsdigita.util.UncheckedWrapperException; - -/** - * Parses an SKOS-formatted file using a {@link Digester}. - * - * @author terry_permeance - */ -class DomainParser { - public DomainParser(final String key, final String title, final String description, final String version, - final Date released) { - m_digester = new Digester(); - - m_digester.push(this); - m_digester.setNamespaceAware(true); - m_digester.setRuleNamespaceURI(Namespaces.SKOS); - - m_digester.addRule("RDF/ConceptScheme", new Rule() { - public void begin(String namespace, String name, Attributes attrs) throws Exception { - URL url = Utils.extractAbout(attrs); - m_builder.createDomain(key, url, title, description, version, released); - } - }); - - m_digester.addFactoryCreate("RDF/Concept", m_builder.newPendingTermFactory()); - - m_digester.addCallMethod("RDF/Concept/prefLabel", "setPreferredLabel", 1, new Class[] { String.class }); - m_digester.addCallParam("RDF/Concept/prefLabel", 0); - - m_digester.addCallMethod("RDF/Concept/altLabel", "addAlternateLabel", 1, new Class[] { String.class }); - m_digester.addCallParam("RDF/Concept/altLabel", 0); - - m_digester.addCallMethod("RDF/Concept/broader", "addBroaderTerm", 1, new Class[] { String.class }); - m_digester.addCallParam("RDF/Concept/broader", 0, "rdf:resource"); - - m_digester.addCallMethod("RDF/Concept/broader/Concept", "addBroaderTerm", 1, new Class[] { String.class }); - m_digester.addCallParam("RDF/Concept/broader/Concept", 0, "rdf:about"); - - m_digester.addCallMethod("RDF/Concept/related", "addRelatedTerm", 1, new Class[] { String.class }); - m_digester.addCallParam("RDF/Concept/related", 0, "rdf:resource"); - - m_digester.addCallMethod("RDF/Concept/related/Concept", "addRelatedTerm", 1, new Class[] { String.class }); - m_digester.addCallParam("RDF/Concept/related/Concept", 0, "rdf:about"); - - m_digester.addCallMethod("RDF/Concept/narrower", "addNarrowerTerm", 1, new Class[] { String.class }); - m_digester.addCallParam("RDF/Concept/narrower", 0, "rdf:resource"); - - m_digester.addCallMethod("RDF/Concept/narrower/Concept", "addNarrowerTerm", 1, new Class[] { String.class }); - m_digester.addCallParam("RDF/Concept/narrower/Concept", 0, "rdf:about"); - } - - public void parse(String file) { - try { - m_digester.parse(file); - m_builder.build(); - } catch (IOException ex) { - throw new UncheckedWrapperException("cannot parse " + file, ex); - } catch (SAXException ex) { - throw new UncheckedWrapperException("cannot parse " + file, ex); - } - } - - private final DomainBuilder m_builder = new DomainBuilder(); - - private final Digester m_digester; -} +/* + * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. + * + * This library is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +package com.arsdigita.london.terms.importer.skos; + +import java.io.IOException; +import java.net.URL; +import java.sql.Date; + +import org.apache.commons.digester.Digester; +import org.apache.commons.digester.Rule; +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; + +import com.arsdigita.util.UncheckedWrapperException; + +/** + * Parses an SKOS-formatted file using a {@link Digester}. + * + * @author terry_permeance + */ +class DomainParser { + public DomainParser(final String key, final String title, final String description, final String version, + final Date released) { + m_digester = new Digester(); + + m_digester.push(this); + m_digester.setNamespaceAware(true); + m_digester.setRuleNamespaceURI(Namespaces.SKOS); + + m_digester.addRule("RDF/ConceptScheme", new Rule() { + public void begin(String namespace, String name, Attributes attrs) throws Exception { + URL url = Utils.extractAbout(attrs); + m_builder.createDomain(key, url, title, description, version, released); + } + }); + + m_digester.addFactoryCreate("RDF/Concept", m_builder.newPendingTermFactory()); + + m_digester.addCallMethod("RDF/Concept/prefLabel", "setPreferredLabel", 1, new Class[] { String.class }); + m_digester.addCallParam("RDF/Concept/prefLabel", 0); + + m_digester.addCallMethod("RDF/Concept/altLabel", "addAlternateLabel", 1, new Class[] { String.class }); + m_digester.addCallParam("RDF/Concept/altLabel", 0); + + m_digester.addCallMethod("RDF/Concept/broader", "addBroaderTerm", 1, new Class[] { String.class }); + m_digester.addCallParam("RDF/Concept/broader", 0, "rdf:resource"); + + m_digester.addCallMethod("RDF/Concept/broader/Concept", "addBroaderTerm", 1, new Class[] { String.class }); + m_digester.addCallParam("RDF/Concept/broader/Concept", 0, "rdf:about"); + + m_digester.addCallMethod("RDF/Concept/related", "addRelatedTerm", 1, new Class[] { String.class }); + m_digester.addCallParam("RDF/Concept/related", 0, "rdf:resource"); + + m_digester.addCallMethod("RDF/Concept/related/Concept", "addRelatedTerm", 1, new Class[] { String.class }); + m_digester.addCallParam("RDF/Concept/related/Concept", 0, "rdf:about"); + + m_digester.addCallMethod("RDF/Concept/narrower", "addNarrowerTerm", 1, new Class[] { String.class }); + m_digester.addCallParam("RDF/Concept/narrower", 0, "rdf:resource"); + + m_digester.addCallMethod("RDF/Concept/narrower/Concept", "addNarrowerTerm", 1, new Class[] { String.class }); + m_digester.addCallParam("RDF/Concept/narrower/Concept", 0, "rdf:about"); + } + + public void parse(String file) { + try { + m_digester.parse(file); + m_builder.build(); + } catch (IOException ex) { + throw new UncheckedWrapperException("cannot parse " + file, ex); + } catch (SAXException ex) { + throw new UncheckedWrapperException("cannot parse " + file, ex); + } + } + + private final DomainBuilder m_builder = new DomainBuilder(); + + private final Digester m_digester; +} diff --git a/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/Namespaces.java b/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/Namespaces.java index 597715fd5..bd1cbe875 100644 --- a/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/Namespaces.java +++ b/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/Namespaces.java @@ -1,31 +1,31 @@ -/* - * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. - * - * This library is free software; you can redistribute it and/or modify it under - * the terms of the GNU Lesser General Public License as published by the Free - * Software Foundation; either version 2.1 of the License, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -package com.arsdigita.london.terms.importer.skos; - -/** - * Declares the namespaces for SKOS and RDF. - * - * @author terry_permeance - */ -final class Namespaces { - public static final String RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; - - public static final String SKOS = "http://www.w3.org/2004/02/skos/core#"; -} +/* + * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. + * + * This library is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +package com.arsdigita.london.terms.importer.skos; + +/** + * Declares the namespaces for SKOS and RDF. + * + * @author terry_permeance + */ +final class Namespaces { + public static final String RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + + public static final String SKOS = "http://www.w3.org/2004/02/skos/core#"; +} diff --git a/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/ProgressBar.java b/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/ProgressBar.java index 8d28044f4..2ad738a94 100644 --- a/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/ProgressBar.java +++ b/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/ProgressBar.java @@ -1,65 +1,65 @@ -/* - * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. - * - * This library is free software; you can redistribute it and/or modify it under - * the terms of the GNU Lesser General Public License as published by the Free - * Software Foundation; either version 2.1 of the License, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -package com.arsdigita.london.terms.importer.skos; - -import org.apache.log4j.Logger; - -import com.arsdigita.util.Assert; - -/** - * A simple progress bar that outputs 0%, 5%, 10%, using a specified logger. - * - * @author terry_permeance - */ -class ProgressBar { - public ProgressBar(int length, Logger logger) { - Assert.isTrue(length > 0); - Assert.isTrue(logger != null); - - m_length = length; - m_logger = logger; - m_position = 0; - m_percentage = -1; - } - - public void next() { - m_position++; - - int percentage = (m_position * 100) / m_length; - - if (percentage != m_percentage && percentage % 5 == 0) { - m_percentage = percentage; - m_logger.info(" " + m_percentage + "% (" + m_position + ")"); - } - } - - public void reset() { - m_position = 0; - m_percentage = -1; - } - - private int m_position; - - private int m_percentage; - - private final int m_length; - - private final Logger m_logger; -} +/* + * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. + * + * This library is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +package com.arsdigita.london.terms.importer.skos; + +import org.apache.log4j.Logger; + +import com.arsdigita.util.Assert; + +/** + * A simple progress bar that outputs 0%, 5%, 10%, using a specified logger. + * + * @author terry_permeance + */ +class ProgressBar { + public ProgressBar(int length, Logger logger) { + Assert.isTrue(length > 0); + Assert.isTrue(logger != null); + + m_length = length; + m_logger = logger; + m_position = 0; + m_percentage = -1; + } + + public void next() { + m_position++; + + int percentage = (m_position * 100) / m_length; + + if (percentage != m_percentage && percentage % 5 == 0) { + m_percentage = percentage; + m_logger.info(" " + m_percentage + "% (" + m_position + ")"); + } + } + + public void reset() { + m_position = 0; + m_percentage = -1; + } + + private int m_position; + + private int m_percentage; + + private final int m_length; + + private final Logger m_logger; +} diff --git a/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/TermBuilder.java b/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/TermBuilder.java index 59c294d2d..63e848994 100644 --- a/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/TermBuilder.java +++ b/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/TermBuilder.java @@ -1,175 +1,175 @@ -/* - * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. - * - * This library is free software; you can redistribute it and/or modify it under - * the terms of the GNU Lesser General Public License as published by the Free - * Software Foundation; either version 2.1 of the License, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -package com.arsdigita.london.terms.importer.skos; - -import java.net.MalformedURLException; -import java.net.URL; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Map; - -import org.apache.log4j.Logger; - -import com.arsdigita.london.terms.Domain; -import com.arsdigita.london.terms.Term; -import com.arsdigita.persistence.SessionManager; -import com.arsdigita.util.Assert; - -/** - * Builds a polyhierarchy of {@link Term} objects. - * - * @author terry_permeance - */ -class TermBuilder { - private static final Logger s_log = Logger.getLogger(TermBuilder.class); - - public TermBuilder(URL url) { - Assert.exists(url); - m_url = url; - m_uniqueID = Utils.extractUniqueID(url); - m_preferredLabel = m_uniqueID; - - if (s_log.isDebugEnabled()) { - s_log.debug("Pending term is " + m_url + " with unique ID " + m_uniqueID); - } - } - - public void setPreferredLabel(String preferredLabel) { - m_preferredLabel = preferredLabel; - if (s_log.isDebugEnabled()) { - s_log.debug("--> Preferred label is " + m_preferredLabel); - } - } - - public void addAlternateLabel(String alternateLabel) { - m_alternateLabels.add(alternateLabel); - if (s_log.isDebugEnabled()) { - s_log.debug("--> An alternate label is " + alternateLabel); - } - } - - public void addBroaderTerm(String url) throws MalformedURLException { - m_broaderTerms.add(new URL(url)); - if (s_log.isDebugEnabled()) { - s_log.debug("--> Broader term is " + url); - } - } - - public void addRelatedTerm(String url) throws MalformedURLException { - m_relatedTerms.add(new URL(url)); - if (s_log.isDebugEnabled()) { - s_log.debug("--> Related term is " + url); - } - } - - public void addNarrowerTerm(String url) throws MalformedURLException { - m_narrowerTerms.add(new URL(url)); - if (s_log.isDebugEnabled()) { - s_log.debug("--> Narrower term is " + url); - } - } - - public void buildTerm(Domain domain, Map termCache) { - Assert.exists(domain); - Assert.exists(termCache); - Assert.exists(m_preferredLabel); - Assert.exists(m_uniqueID); - - if (m_preferredLabel.equals(m_uniqueID)) { - s_log.warn("Preferred label was missing for concept " + m_uniqueID); - } - - final boolean inAtoZ = false; - final String name = m_preferredLabel; - final String shortcut = null; - - Term term = (Term) termCache.get(m_uniqueID); - - if (term != null) { - term.setName(name); - term.setInAtoZ(inAtoZ); - term.setShortcut(shortcut); - } else { - term = Term.create(m_uniqueID, name, inAtoZ, shortcut, domain); - termCache.put(term.getUniqueID(), term); - } - term.save(); - } - - public void buildPolyhierarchy(Domain domain, Map termCache) { - Term thisTerm = (Term) termCache.get(m_uniqueID); - - if (m_broaderTerms.isEmpty()) { - domain.addRootTerm(thisTerm); - } - - for (Iterator i = m_broaderTerms.iterator(); i.hasNext();) { - URL url = i.next(); - String uniqueID = Utils.extractUniqueID(url); - Term targetTerm = (Term) termCache.get(uniqueID); - if (targetTerm == null) { - s_log.warn("Narrower term " + uniqueID + " (" + url + ") does not exist"); - } else { - targetTerm.addNarrowerTerm(thisTerm, true, true); - } - } - - for (Iterator i = m_relatedTerms.iterator(); i.hasNext();) { - URL url = i.next(); - String uniqueID = Utils.extractUniqueID(url); - Term targetTerm = (Term) termCache.get(uniqueID); - if (targetTerm == null) { - s_log.warn("Related term " + uniqueID + " (" + url + ") does not exist"); - } else { - thisTerm.addRelatedTerm(targetTerm); - } - } - - for (Iterator i = m_alternateLabels.iterator(); i.hasNext();) { - String alternateLabel = i.next(); - String uniqueID = String.valueOf(s_next_synonym_id--); - Term targetTerm = Term.create(uniqueID, alternateLabel, false, null, domain); - targetTerm.addPreferredTerm(thisTerm); - } - - SessionManager.getSession().flushAll(); - } - - public URL getURL() { - return m_url; - } - - private final URL m_url; - - private final String m_uniqueID; - - private String m_preferredLabel; - - private final List m_alternateLabels = new ArrayList(); - - private final List m_broaderTerms = new ArrayList(); - - private final List m_relatedTerms = new ArrayList(); - - private final List m_narrowerTerms = new ArrayList(); - - private static int s_next_synonym_id = Integer.MAX_VALUE; -} +/* + * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. + * + * This library is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +package com.arsdigita.london.terms.importer.skos; + +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.apache.log4j.Logger; + +import com.arsdigita.london.terms.Domain; +import com.arsdigita.london.terms.Term; +import com.arsdigita.persistence.SessionManager; +import com.arsdigita.util.Assert; + +/** + * Builds a polyhierarchy of {@link Term} objects. + * + * @author terry_permeance + */ +class TermBuilder { + private static final Logger s_log = Logger.getLogger(TermBuilder.class); + + public TermBuilder(URL url) { + Assert.exists(url); + m_url = url; + m_uniqueID = Utils.extractUniqueID(url); + m_preferredLabel = m_uniqueID; + + if (s_log.isDebugEnabled()) { + s_log.debug("Pending term is " + m_url + " with unique ID " + m_uniqueID); + } + } + + public void setPreferredLabel(String preferredLabel) { + m_preferredLabel = preferredLabel; + if (s_log.isDebugEnabled()) { + s_log.debug("--> Preferred label is " + m_preferredLabel); + } + } + + public void addAlternateLabel(String alternateLabel) { + m_alternateLabels.add(alternateLabel); + if (s_log.isDebugEnabled()) { + s_log.debug("--> An alternate label is " + alternateLabel); + } + } + + public void addBroaderTerm(String url) throws MalformedURLException { + m_broaderTerms.add(new URL(url)); + if (s_log.isDebugEnabled()) { + s_log.debug("--> Broader term is " + url); + } + } + + public void addRelatedTerm(String url) throws MalformedURLException { + m_relatedTerms.add(new URL(url)); + if (s_log.isDebugEnabled()) { + s_log.debug("--> Related term is " + url); + } + } + + public void addNarrowerTerm(String url) throws MalformedURLException { + m_narrowerTerms.add(new URL(url)); + if (s_log.isDebugEnabled()) { + s_log.debug("--> Narrower term is " + url); + } + } + + public void buildTerm(Domain domain, Map termCache) { + Assert.exists(domain); + Assert.exists(termCache); + Assert.exists(m_preferredLabel); + Assert.exists(m_uniqueID); + + if (m_preferredLabel.equals(m_uniqueID)) { + s_log.warn("Preferred label was missing for concept " + m_uniqueID); + } + + final boolean inAtoZ = false; + final String name = m_preferredLabel; + final String shortcut = null; + + Term term = (Term) termCache.get(m_uniqueID); + + if (term != null) { + term.setName(name); + term.setInAtoZ(inAtoZ); + term.setShortcut(shortcut); + } else { + term = Term.create(m_uniqueID, name, inAtoZ, shortcut, domain); + termCache.put(term.getUniqueID(), term); + } + term.save(); + } + + public void buildPolyhierarchy(Domain domain, Map termCache) { + Term thisTerm = (Term) termCache.get(m_uniqueID); + + if (m_broaderTerms.isEmpty()) { + domain.addRootTerm(thisTerm); + } + + for (Iterator i = m_broaderTerms.iterator(); i.hasNext();) { + URL url = i.next(); + String uniqueID = Utils.extractUniqueID(url); + Term targetTerm = (Term) termCache.get(uniqueID); + if (targetTerm == null) { + s_log.warn("Narrower term " + uniqueID + " (" + url + ") does not exist"); + } else { + targetTerm.addNarrowerTerm(thisTerm, true, true); + } + } + + for (Iterator i = m_relatedTerms.iterator(); i.hasNext();) { + URL url = i.next(); + String uniqueID = Utils.extractUniqueID(url); + Term targetTerm = (Term) termCache.get(uniqueID); + if (targetTerm == null) { + s_log.warn("Related term " + uniqueID + " (" + url + ") does not exist"); + } else { + thisTerm.addRelatedTerm(targetTerm); + } + } + + for (Iterator i = m_alternateLabels.iterator(); i.hasNext();) { + String alternateLabel = i.next(); + String uniqueID = String.valueOf(s_next_synonym_id--); + Term targetTerm = Term.create(uniqueID, alternateLabel, false, null, domain); + targetTerm.addPreferredTerm(thisTerm); + } + + SessionManager.getSession().flushAll(); + } + + public URL getURL() { + return m_url; + } + + private final URL m_url; + + private final String m_uniqueID; + + private String m_preferredLabel; + + private final List m_alternateLabels = new ArrayList(); + + private final List m_broaderTerms = new ArrayList(); + + private final List m_relatedTerms = new ArrayList(); + + private final List m_narrowerTerms = new ArrayList(); + + private static int s_next_synonym_id = Integer.MAX_VALUE; +} diff --git a/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/Utils.java b/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/Utils.java index 9509339eb..d07e95f17 100644 --- a/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/Utils.java +++ b/ccm-ldn-terms/src/com/arsdigita/london/terms/importer/skos/Utils.java @@ -1,79 +1,79 @@ -/* - * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. - * - * This library is free software; you can redistribute it and/or modify it under - * the terms of the GNU Lesser General Public License as published by the Free - * Software Foundation; either version 2.1 of the License, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -package com.arsdigita.london.terms.importer.skos; - -import java.net.MalformedURLException; -import java.net.URL; - -import org.xml.sax.Attributes; - -import com.arsdigita.util.StringUtils; - -/** - * Utilities for parsing SKOS files. - * - * @author terry_permeance - */ -class Utils { - /** - * Extract the rdf:about attribute from the attributes of an element. - * - * @param attrs the attributes containing the rdf:about attribute. - * - * @return the URL for the rdf:about - * - * @throws MalformedURLException if the rdf:about attribute is malformed - */ - public static URL extractAbout(Attributes attrs) throws MalformedURLException { - return new URL(attrs.getValue(Namespaces.RDF, "about")); - } - - /** - * Extract the unique ID from the URL. - * - *

Examples:

- *
    - *
  • http://www.fao.org/aos/agrovoc#c_3 » c_3 - *
  • http://iaaa.cps.unizar.es/thesaurus/T5_INFORMATION AND COMMUNICATION » T5_INFORMATION AND COMMUNICATION - *
  • http://www.eionet.eu.int/gemet/concept/3395 » 3395 - *
- * - * @param url the URL identifying the term - * - * @return the unique ID - */ - public static String extractUniqueID(URL url) { - String uniqueID = null; - - if (!StringUtils.emptyString(url.getRef())) { - uniqueID = url.getRef(); - } else { - uniqueID = url.toExternalForm(); - while (uniqueID.endsWith("/")) { - uniqueID = uniqueID.substring(0, uniqueID.length() - 1); - } - int lastSlashIndex = uniqueID.lastIndexOf('/'); - if (lastSlashIndex > 0) { - uniqueID = uniqueID.substring(lastSlashIndex + 1); - } - } - return uniqueID; - } -} +/* + * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. + * + * This library is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +package com.arsdigita.london.terms.importer.skos; + +import java.net.MalformedURLException; +import java.net.URL; + +import org.xml.sax.Attributes; + +import com.arsdigita.util.StringUtils; + +/** + * Utilities for parsing SKOS files. + * + * @author terry_permeance + */ +class Utils { + /** + * Extract the rdf:about attribute from the attributes of an element. + * + * @param attrs the attributes containing the rdf:about attribute. + * + * @return the URL for the rdf:about + * + * @throws MalformedURLException if the rdf:about attribute is malformed + */ + public static URL extractAbout(Attributes attrs) throws MalformedURLException { + return new URL(attrs.getValue(Namespaces.RDF, "about")); + } + + /** + * Extract the unique ID from the URL. + * + *

Examples:

+ *
    + *
  • http://www.fao.org/aos/agrovoc#c_3 » c_3 + *
  • http://iaaa.cps.unizar.es/thesaurus/T5_INFORMATION AND COMMUNICATION » T5_INFORMATION AND COMMUNICATION + *
  • http://www.eionet.eu.int/gemet/concept/3395 » 3395 + *
+ * + * @param url the URL identifying the term + * + * @return the unique ID + */ + public static String extractUniqueID(URL url) { + String uniqueID = null; + + if (!StringUtils.emptyString(url.getRef())) { + uniqueID = url.getRef(); + } else { + uniqueID = url.toExternalForm(); + while (uniqueID.endsWith("/")) { + uniqueID = uniqueID.substring(0, uniqueID.length() - 1); + } + int lastSlashIndex = uniqueID.lastIndexOf('/'); + if (lastSlashIndex > 0) { + uniqueID = uniqueID.substring(lastSlashIndex + 1); + } + } + return uniqueID; + } +} diff --git a/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/RankedTerm.java b/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/RankedTerm.java index d336f66aa..4b10803eb 100644 --- a/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/RankedTerm.java +++ b/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/RankedTerm.java @@ -1,52 +1,52 @@ -/* - * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. - * - * This library is free software; you can redistribute it and/or modify it under - * the terms of the GNU Lesser General Public License as published by the Free - * Software Foundation; either version 2.1 of the License, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -package com.arsdigita.london.terms.indexing; - -import java.math.BigDecimal; - -import com.arsdigita.london.terms.Term; -import com.arsdigita.util.Assert; - -/** - * @author terry_permeance - */ -public class RankedTerm { - - private final Term m_term; - - private final BigDecimal m_ranking; - - public RankedTerm(Term term, BigDecimal ranking) { - Assert.exists(term); - Assert.exists(ranking); - Assert.isTrue(ranking.compareTo(BigDecimal.ONE) < 0); - Assert.isTrue(ranking.compareTo(BigDecimal.ZERO) > 0); - m_term = term; - m_ranking = ranking.setScale(4, BigDecimal.ROUND_HALF_DOWN); - } - - public Term getTerm() { - return m_term; - } - - public BigDecimal getRanking() { - return m_ranking; - } -} +/* + * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. + * + * This library is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +package com.arsdigita.london.terms.indexing; + +import java.math.BigDecimal; + +import com.arsdigita.london.terms.Term; +import com.arsdigita.util.Assert; + +/** + * @author terry_permeance + */ +public class RankedTerm { + + private final Term m_term; + + private final BigDecimal m_ranking; + + public RankedTerm(Term term, BigDecimal ranking) { + Assert.exists(term); + Assert.exists(ranking); + Assert.isTrue(ranking.compareTo(BigDecimal.ONE) < 0); + Assert.isTrue(ranking.compareTo(BigDecimal.ZERO) > 0); + m_term = term; + m_ranking = ranking.setScale(4, BigDecimal.ROUND_HALF_DOWN); + } + + public Term getTerm() { + return m_term; + } + + public BigDecimal getRanking() { + return m_ranking; + } +} diff --git a/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/FilterBuilder.java b/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/FilterBuilder.java index 31491307b..922b641b5 100644 --- a/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/FilterBuilder.java +++ b/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/FilterBuilder.java @@ -1,150 +1,150 @@ -/* - * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. - * - * This library is free software; you can redistribute it and/or modify it under - * the terms of the GNU Lesser General Public License as published by the Free - * Software Foundation; either version 2.1 of the License, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -package com.arsdigita.london.terms.indexing.kea; - -import kea.filters.KEAFilter; -import kea.stemmers.PorterStemmer; - -import org.apache.log4j.Logger; - -import weka.core.Attribute; -import weka.core.FastVector; -import weka.core.Instance; -import weka.core.Instances; - -import com.arsdigita.cms.ContentItem; -import com.arsdigita.domain.DomainCollection; -import com.arsdigita.london.terms.Domain; -import com.arsdigita.london.terms.Term; -import com.arsdigita.london.terms.indexing.IndexingConfig; -import com.arsdigita.search.ContentProvider; -import com.arsdigita.search.ContentType; -import com.arsdigita.search.MetadataProvider; -import com.arsdigita.search.MetadataProviderRegistry; -import com.arsdigita.util.Assert; - -/** - * @author terry_permeance - */ -class FilterBuilder { - private static final Logger s_log = Logger.getLogger(FilterBuilder.class); - - private final Domain m_domain; - - private final String m_language; - - private final IndexingConfig m_config = IndexingConfig.getInstance(); - - public FilterBuilder(Domain domain, String language) { - Assert.exists(domain); - Assert.exists(language); - - m_domain = domain; - m_language = language; - } - - public KEAFilter build() { - - try { - FastVector atts = new FastVector(2); - atts.addElement(new Attribute("doc", (FastVector) null)); - atts.addElement(new Attribute("keyphrases", (FastVector) null)); - Instances data = new Instances("keyphrase_training_data", atts, 0); - - // Build model - KEAFilter filter = new KEAFilter(); - - filter.setDebug(false); - filter.setDisallowInternalPeriods(m_config.disallowInternalPeriods()); - filter.setKFused(m_config.keyphraseFrequencyEnabled()); - filter.setMaxPhraseLength(m_config.getMaxPhraseLength()); - filter.setMinPhraseLength(m_config.getMinPhraseLength()); - filter.setMinNumOccur(m_config.getMinPhraseOccurrences()); - filter.setCheckForProperNouns(m_config.checkForProperNouns()); - filter.setStemmer(new PorterStemmer()); - filter.setDocumentLanguage(m_language); - filter.setVocabulary(m_domain.getKey()); - filter.setVocabularyFormat("aplaws"); - filter.setStopwords(new Stopwords(m_language)); - filter.setInputFormat(data); - filter.setNumFeature(); - filter.m_Vocabulary = VocabularyCache.getVocabulary(m_domain, filter.getDocumentLanguage()); - - s_log.debug("Reading the training content... "); - Queries.TrainingItems items = new Queries.TrainingItems(m_domain, filter.getDocumentLanguage()); - items.setRange(0, m_config.getMaxTrainingItems() + 1); - - try { - while (items.next()) { - ContentItem item = new ContentItem(items.getID()); - - if (s_log.isDebugEnabled()) { - s_log.debug(" --> Reading " + item.getName()); - } - - double[] newInst = new double[2]; - - // Text content - MetadataProvider adapter = MetadataProviderRegistry.findAdapter(item.getObjectType()); - ContentProvider[] content = adapter.getContent(item, ContentType.TEXT); - StringBuffer buf = new StringBuffer(); - for (int i = 0, n = content.length; i < n; i++) { - if (content[i].getType().equals(ContentType.TEXT)) { - buf.append(new String(content[i].getBytes())); - } - } - newInst[0] = (double) data.attribute(0).addStringValue(buf.toString()); - - // Assigned terms - StringBuffer keyStr = new StringBuffer(); - DomainCollection terms = m_domain.getTerms(); - try { - terms.addEqualsFilter("model.childObjects.contentChildren", item.getID()); - - while (terms.next()) { - Term nextTerm = (Term) terms.getDomainObject(); - keyStr.append(nextTerm.getModel().getName().toUpperCase()); - keyStr.append("\n"); - } - } finally { - terms.close(); - } - newInst[1] = (double) data.attribute(1).addStringValue(keyStr.toString()); - - // Train - data.add(new Instance(1.0, newInst)); - filter.input(data.instance(0)); - data = data.stringFreeStructure(); - } - } finally { - items.close(); - } - - filter.batchFinished(); - - while ((filter.output()) != null) { - // Nothing to do here! - } - return filter; - } catch (Exception e) { - throw new RuntimeException(e); - } - } -} +/* + * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. + * + * This library is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +package com.arsdigita.london.terms.indexing.kea; + +import kea.filters.KEAFilter; +import kea.stemmers.PorterStemmer; + +import org.apache.log4j.Logger; + +import weka.core.Attribute; +import weka.core.FastVector; +import weka.core.Instance; +import weka.core.Instances; + +import com.arsdigita.cms.ContentItem; +import com.arsdigita.domain.DomainCollection; +import com.arsdigita.london.terms.Domain; +import com.arsdigita.london.terms.Term; +import com.arsdigita.london.terms.indexing.IndexingConfig; +import com.arsdigita.search.ContentProvider; +import com.arsdigita.search.ContentType; +import com.arsdigita.search.MetadataProvider; +import com.arsdigita.search.MetadataProviderRegistry; +import com.arsdigita.util.Assert; + +/** + * @author terry_permeance + */ +class FilterBuilder { + private static final Logger s_log = Logger.getLogger(FilterBuilder.class); + + private final Domain m_domain; + + private final String m_language; + + private final IndexingConfig m_config = IndexingConfig.getInstance(); + + public FilterBuilder(Domain domain, String language) { + Assert.exists(domain); + Assert.exists(language); + + m_domain = domain; + m_language = language; + } + + public KEAFilter build() { + + try { + FastVector atts = new FastVector(2); + atts.addElement(new Attribute("doc", (FastVector) null)); + atts.addElement(new Attribute("keyphrases", (FastVector) null)); + Instances data = new Instances("keyphrase_training_data", atts, 0); + + // Build model + KEAFilter filter = new KEAFilter(); + + filter.setDebug(false); + filter.setDisallowInternalPeriods(m_config.disallowInternalPeriods()); + filter.setKFused(m_config.keyphraseFrequencyEnabled()); + filter.setMaxPhraseLength(m_config.getMaxPhraseLength()); + filter.setMinPhraseLength(m_config.getMinPhraseLength()); + filter.setMinNumOccur(m_config.getMinPhraseOccurrences()); + filter.setCheckForProperNouns(m_config.checkForProperNouns()); + filter.setStemmer(new PorterStemmer()); + filter.setDocumentLanguage(m_language); + filter.setVocabulary(m_domain.getKey()); + filter.setVocabularyFormat("aplaws"); + filter.setStopwords(new Stopwords(m_language)); + filter.setInputFormat(data); + filter.setNumFeature(); + filter.m_Vocabulary = VocabularyCache.getVocabulary(m_domain, filter.getDocumentLanguage()); + + s_log.debug("Reading the training content... "); + Queries.TrainingItems items = new Queries.TrainingItems(m_domain, filter.getDocumentLanguage()); + items.setRange(0, m_config.getMaxTrainingItems() + 1); + + try { + while (items.next()) { + ContentItem item = new ContentItem(items.getID()); + + if (s_log.isDebugEnabled()) { + s_log.debug(" --> Reading " + item.getName()); + } + + double[] newInst = new double[2]; + + // Text content + MetadataProvider adapter = MetadataProviderRegistry.findAdapter(item.getObjectType()); + ContentProvider[] content = adapter.getContent(item, ContentType.TEXT); + StringBuffer buf = new StringBuffer(); + for (int i = 0, n = content.length; i < n; i++) { + if (content[i].getType().equals(ContentType.TEXT)) { + buf.append(new String(content[i].getBytes())); + } + } + newInst[0] = (double) data.attribute(0).addStringValue(buf.toString()); + + // Assigned terms + StringBuffer keyStr = new StringBuffer(); + DomainCollection terms = m_domain.getTerms(); + try { + terms.addEqualsFilter("model.childObjects.contentChildren", item.getID()); + + while (terms.next()) { + Term nextTerm = (Term) terms.getDomainObject(); + keyStr.append(nextTerm.getModel().getName().toUpperCase()); + keyStr.append("\n"); + } + } finally { + terms.close(); + } + newInst[1] = (double) data.attribute(1).addStringValue(keyStr.toString()); + + // Train + data.add(new Instance(1.0, newInst)); + filter.input(data.instance(0)); + data = data.stringFreeStructure(); + } + } finally { + items.close(); + } + + filter.batchFinished(); + + while ((filter.output()) != null) { + // Nothing to do here! + } + return filter; + } catch (Exception e) { + throw new RuntimeException(e); + } + } +} diff --git a/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/FilterCache.java b/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/FilterCache.java index c01e712a5..a0056a61d 100644 --- a/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/FilterCache.java +++ b/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/FilterCache.java @@ -1,61 +1,61 @@ -/* - * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. - * - * This library is free software; you can redistribute it and/or modify it under - * the terms of the GNU Lesser General Public License as published by the Free - * Software Foundation; either version 2.1 of the License, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -package com.arsdigita.london.terms.indexing.kea; - -import kea.filters.KEAFilter; - -import com.arsdigita.caching.CacheTable; -import com.arsdigita.london.terms.Domain; -import com.arsdigita.util.Assert; - -/** - * @author terry_permeance - */ -class FilterCache { - - static KEAFilter getFilter(Domain domain, String language) { - Assert.exists(domain); - Assert.exists(language); - - String key = domain.getKey() + "_" + language; - KEAFilter filter = (KEAFilter) s_cache.get(key); - if (filter == null) { - FilterBuilder builder = new FilterBuilder(domain, language); - filter = builder.build(); - s_cache.put(key, filter); - } - return filter; - } - - public static KEAFilter recreateFilter(Domain domain, String language) { - Assert.exists(domain); - Assert.exists(language); - - String key = domain.getKey() + "_" + language; - s_cache.remove(key); - return getFilter(domain, language); - } - - public static void reset() { - s_cache.removeAll(); - } - - private static final CacheTable s_cache = new CacheTable("FilterCache", false); -} +/* + * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. + * + * This library is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +package com.arsdigita.london.terms.indexing.kea; + +import kea.filters.KEAFilter; + +import com.arsdigita.caching.CacheTable; +import com.arsdigita.london.terms.Domain; +import com.arsdigita.util.Assert; + +/** + * @author terry_permeance + */ +class FilterCache { + + static KEAFilter getFilter(Domain domain, String language) { + Assert.exists(domain); + Assert.exists(language); + + String key = domain.getKey() + "_" + language; + KEAFilter filter = (KEAFilter) s_cache.get(key); + if (filter == null) { + FilterBuilder builder = new FilterBuilder(domain, language); + filter = builder.build(); + s_cache.put(key, filter); + } + return filter; + } + + public static KEAFilter recreateFilter(Domain domain, String language) { + Assert.exists(domain); + Assert.exists(language); + + String key = domain.getKey() + "_" + language; + s_cache.remove(key); + return getFilter(domain, language); + } + + public static void reset() { + s_cache.removeAll(); + } + + private static final CacheTable s_cache = new CacheTable("FilterCache", false); +} diff --git a/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/IndexerService.java b/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/IndexerService.java index 8614db697..2f2d100d0 100644 --- a/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/IndexerService.java +++ b/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/IndexerService.java @@ -1,124 +1,124 @@ -/* - * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. - * - * This library is free software; you can redistribute it and/or modify it under - * the terms of the GNU Lesser General Public License as published by the Free - * Software Foundation; either version 2.1 of the License, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -package com.arsdigita.london.terms.indexing.kea; - -import java.math.BigDecimal; -import java.util.ArrayList; -import java.util.List; - -import kea.filters.KEAFilter; - -import org.apache.log4j.Logger; - -import weka.core.Attribute; -import weka.core.FastVector; -import weka.core.Instance; -import weka.core.Instances; - -import com.arsdigita.cms.ContentItem; -import com.arsdigita.cms.TextPage; -import com.arsdigita.london.terms.Domain; -import com.arsdigita.london.terms.Term; -import com.arsdigita.london.terms.indexing.Indexer; -import com.arsdigita.london.terms.indexing.RankedTerm; - -/** - * @author terry_permeance - */ -public class IndexerService { - private static final Logger s_log = Logger.getLogger(Indexer.class); - - public List controlledIndex(Object f, Domain domain, int maxTerms, ContentItem item) { - - if (s_log.isDebugEnabled()) { - s_log.debug("Extracting index from " + item); - } - final long t1 = System.currentTimeMillis(); - - KEAFilter filter = (KEAFilter) f; - filter.setNumPhrases(maxTerms); - - FastVector atts = new FastVector(3); - atts.addElement(new Attribute("doc", (FastVector) null)); - atts.addElement(new Attribute("keyphrases", (FastVector) null)); - atts.addElement(new Attribute("filename", (String) null)); - Instances data = new Instances("keyphrase_training_data", atts, 0); - - // Extract keyphrases - StringBuffer txtStr = new StringBuffer(); - txtStr.append(((TextPage) item).getTextAsset().getText()); - - double[] newInst = new double[2]; - newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString()); - newInst[1] = Instance.missingValue(); - - data.add(new Instance(1.0, newInst)); - try { - filter.input(data.instance(0)); - } catch (Exception e) { - throw new RuntimeException(e); - } - data = data.stringFreeStructure(); - Instance[] topRankedInstances = new Instance[filter.getNumPhrases()]; - Instance inst; - - // Iterating over all extracted keyphrases (inst) - while ((inst = filter.output()) != null) { - int index = (int) inst.value(filter.getRankIndex()) - 1; - if (index < filter.getNumPhrases()) { - topRankedInstances[index] = inst; - } - if (s_log.isDebugEnabled()) { - s_log.debug(inst.toString()); - } - } - - // Extract the unique ID's of the matching keyphrases - List terms = new ArrayList(); - for (int i = 0; i < filter.getNumPhrases(); i++) { - if (topRankedInstances[i] != null) { - String uniqueID = topRankedInstances[i].stringValue(filter.getStemmedPhraseIndex()); - BigDecimal ranking = BigDecimal.valueOf(topRankedInstances[i].value(filter.getProbabilityIndex())); - Term term = domain.getTerm(uniqueID); - terms.add(new RankedTerm(term, ranking)); - } - } - - if (s_log.isDebugEnabled()) { - s_log.debug("Extracted index from " + item + " in " + (System.currentTimeMillis() - t1) + "ms"); - } - return terms; - } - - public Object train(Domain domain, String language) { - final long t1 = System.currentTimeMillis(); - - if (s_log.isDebugEnabled()) { - s_log.debug("Training indexer for domain " + domain.getKey() + "..."); - } - KEAFilter filter = FilterCache.recreateFilter(domain, language); - - if (s_log.isDebugEnabled()) { - s_log.debug("Trained indexer for domain " + domain.getKey() + " in " + (System.currentTimeMillis() - t1) - + "ms"); - } - return filter; - } -} +/* + * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. + * + * This library is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +package com.arsdigita.london.terms.indexing.kea; + +import java.math.BigDecimal; +import java.util.ArrayList; +import java.util.List; + +import kea.filters.KEAFilter; + +import org.apache.log4j.Logger; + +import weka.core.Attribute; +import weka.core.FastVector; +import weka.core.Instance; +import weka.core.Instances; + +import com.arsdigita.cms.ContentItem; +import com.arsdigita.cms.TextPage; +import com.arsdigita.london.terms.Domain; +import com.arsdigita.london.terms.Term; +import com.arsdigita.london.terms.indexing.Indexer; +import com.arsdigita.london.terms.indexing.RankedTerm; + +/** + * @author terry_permeance + */ +public class IndexerService { + private static final Logger s_log = Logger.getLogger(Indexer.class); + + public List controlledIndex(Object f, Domain domain, int maxTerms, ContentItem item) { + + if (s_log.isDebugEnabled()) { + s_log.debug("Extracting index from " + item); + } + final long t1 = System.currentTimeMillis(); + + KEAFilter filter = (KEAFilter) f; + filter.setNumPhrases(maxTerms); + + FastVector atts = new FastVector(3); + atts.addElement(new Attribute("doc", (FastVector) null)); + atts.addElement(new Attribute("keyphrases", (FastVector) null)); + atts.addElement(new Attribute("filename", (String) null)); + Instances data = new Instances("keyphrase_training_data", atts, 0); + + // Extract keyphrases + StringBuffer txtStr = new StringBuffer(); + txtStr.append(((TextPage) item).getTextAsset().getText()); + + double[] newInst = new double[2]; + newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString()); + newInst[1] = Instance.missingValue(); + + data.add(new Instance(1.0, newInst)); + try { + filter.input(data.instance(0)); + } catch (Exception e) { + throw new RuntimeException(e); + } + data = data.stringFreeStructure(); + Instance[] topRankedInstances = new Instance[filter.getNumPhrases()]; + Instance inst; + + // Iterating over all extracted keyphrases (inst) + while ((inst = filter.output()) != null) { + int index = (int) inst.value(filter.getRankIndex()) - 1; + if (index < filter.getNumPhrases()) { + topRankedInstances[index] = inst; + } + if (s_log.isDebugEnabled()) { + s_log.debug(inst.toString()); + } + } + + // Extract the unique ID's of the matching keyphrases + List terms = new ArrayList(); + for (int i = 0; i < filter.getNumPhrases(); i++) { + if (topRankedInstances[i] != null) { + String uniqueID = topRankedInstances[i].stringValue(filter.getStemmedPhraseIndex()); + BigDecimal ranking = BigDecimal.valueOf(topRankedInstances[i].value(filter.getProbabilityIndex())); + Term term = domain.getTerm(uniqueID); + terms.add(new RankedTerm(term, ranking)); + } + } + + if (s_log.isDebugEnabled()) { + s_log.debug("Extracted index from " + item + " in " + (System.currentTimeMillis() - t1) + "ms"); + } + return terms; + } + + public Object train(Domain domain, String language) { + final long t1 = System.currentTimeMillis(); + + if (s_log.isDebugEnabled()) { + s_log.debug("Training indexer for domain " + domain.getKey() + "..."); + } + KEAFilter filter = FilterCache.recreateFilter(domain, language); + + if (s_log.isDebugEnabled()) { + s_log.debug("Trained indexer for domain " + domain.getKey() + " in " + (System.currentTimeMillis() - t1) + + "ms"); + } + return filter; + } +} diff --git a/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/Queries.java b/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/Queries.java index 8f4374a6c..e666c6a5a 100644 --- a/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/Queries.java +++ b/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/Queries.java @@ -1,123 +1,123 @@ -/* - * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. - * - * This library is free software; you can redistribute it and/or modify it under - * the terms of the GNU Lesser General Public License as published by the Free - * Software Foundation; either version 2.1 of the License, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -package com.arsdigita.london.terms.indexing.kea; - -import java.math.BigDecimal; - -import com.arsdigita.categorization.Category; -import com.arsdigita.cms.ContentItem; -import com.arsdigita.domain.DomainQuery; -import com.arsdigita.london.terms.Domain; -import com.arsdigita.london.terms.Term; - -/** - * Queries for performance optimisation of keyphrase extraction. - * - * @author terry_permeance - */ -class Queries { - - static class TrainingItems extends DomainQuery { - - public static final String QUERY_NAME = "com.arsdigita.london.terms.indexing.getTrainingItems"; - public static final String ITEM_ID = ContentItem.ID; - - TrainingItems(Domain domain, String language) { - super(QUERY_NAME); - setParameter(Term.DOMAIN, domain.getKey()); - setParameter(ContentItem.LANGUAGE, language); - } - - public BigDecimal getID() { - return (BigDecimal) get(ITEM_ID); - } - } - - static class PreferredTerms extends DomainQuery { - - public static final String QUERY_NAME = "com.arsdigita.london.terms.indexing.getPreferredTerms"; - public static final String UNIQUE_ID = Term.UNIQUE_ID; - public static final String NAME = Category.NAME; - - PreferredTerms(Domain domain) { - super(QUERY_NAME); - setParameter("domain", domain.getKey()); - } - - public String getUniqueID() { - return String.valueOf(get(UNIQUE_ID)); - } - - public String getName() { - return (String) get(NAME); - } - } - - static class NonPreferredTerms extends DomainQuery { - - public static final String QUERY_NAME = "com.arsdigita.london.terms.indexing.getNonPreferredTerms"; - public static final String UNIQUE_ID = Term.UNIQUE_ID; - public static final String NAME = Category.NAME; - public static final String PREFERRED_UNIQUE_ID = "preferredUniqueID"; - - NonPreferredTerms(Domain domain) { - super(QUERY_NAME); - setParameter("domain", domain.getKey()); - } - - public String getUniqueID() { - return String.valueOf(get(UNIQUE_ID)); - } - - public String getPreferredUniqueID() { - return String.valueOf(get(PREFERRED_UNIQUE_ID)); - } - - public String getName() { - return (String) get(NAME); - } - } - - static class RelatedTerms extends DomainQuery { - - public static final String QUERY_NAME = "com.arsdigita.london.terms.indexing.getRelatedTerms"; - public static final String UNIQUE_ID = Term.UNIQUE_ID; - public static final String RELATED_UNIQUE_ID = "relatedUniqueID"; - public static final String RELATION_TYPE = Category.REL_TYPE; - - RelatedTerms(Domain domain) { - super(QUERY_NAME); - setParameter("domain", domain.getKey()); - } - - public String getUniqueID() { - return String.valueOf(get(UNIQUE_ID)); - } - - public String getRelatedUniqueID() { - return String.valueOf(get(RELATED_UNIQUE_ID)); - } - - public String getRelationType() { - return (String) get(RELATION_TYPE); - } - - } -} +/* + * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. + * + * This library is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +package com.arsdigita.london.terms.indexing.kea; + +import java.math.BigDecimal; + +import com.arsdigita.categorization.Category; +import com.arsdigita.cms.ContentItem; +import com.arsdigita.domain.DomainQuery; +import com.arsdigita.london.terms.Domain; +import com.arsdigita.london.terms.Term; + +/** + * Queries for performance optimisation of keyphrase extraction. + * + * @author terry_permeance + */ +class Queries { + + static class TrainingItems extends DomainQuery { + + public static final String QUERY_NAME = "com.arsdigita.london.terms.indexing.getTrainingItems"; + public static final String ITEM_ID = ContentItem.ID; + + TrainingItems(Domain domain, String language) { + super(QUERY_NAME); + setParameter(Term.DOMAIN, domain.getKey()); + setParameter(ContentItem.LANGUAGE, language); + } + + public BigDecimal getID() { + return (BigDecimal) get(ITEM_ID); + } + } + + static class PreferredTerms extends DomainQuery { + + public static final String QUERY_NAME = "com.arsdigita.london.terms.indexing.getPreferredTerms"; + public static final String UNIQUE_ID = Term.UNIQUE_ID; + public static final String NAME = Category.NAME; + + PreferredTerms(Domain domain) { + super(QUERY_NAME); + setParameter("domain", domain.getKey()); + } + + public String getUniqueID() { + return String.valueOf(get(UNIQUE_ID)); + } + + public String getName() { + return (String) get(NAME); + } + } + + static class NonPreferredTerms extends DomainQuery { + + public static final String QUERY_NAME = "com.arsdigita.london.terms.indexing.getNonPreferredTerms"; + public static final String UNIQUE_ID = Term.UNIQUE_ID; + public static final String NAME = Category.NAME; + public static final String PREFERRED_UNIQUE_ID = "preferredUniqueID"; + + NonPreferredTerms(Domain domain) { + super(QUERY_NAME); + setParameter("domain", domain.getKey()); + } + + public String getUniqueID() { + return String.valueOf(get(UNIQUE_ID)); + } + + public String getPreferredUniqueID() { + return String.valueOf(get(PREFERRED_UNIQUE_ID)); + } + + public String getName() { + return (String) get(NAME); + } + } + + static class RelatedTerms extends DomainQuery { + + public static final String QUERY_NAME = "com.arsdigita.london.terms.indexing.getRelatedTerms"; + public static final String UNIQUE_ID = Term.UNIQUE_ID; + public static final String RELATED_UNIQUE_ID = "relatedUniqueID"; + public static final String RELATION_TYPE = Category.REL_TYPE; + + RelatedTerms(Domain domain) { + super(QUERY_NAME); + setParameter("domain", domain.getKey()); + } + + public String getUniqueID() { + return String.valueOf(get(UNIQUE_ID)); + } + + public String getRelatedUniqueID() { + return String.valueOf(get(RELATED_UNIQUE_ID)); + } + + public String getRelationType() { + return (String) get(RELATION_TYPE); + } + + } +} diff --git a/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/Stopwords.java b/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/Stopwords.java index 78a87bddc..7831feae1 100644 --- a/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/Stopwords.java +++ b/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/Stopwords.java @@ -1,57 +1,57 @@ -/* - * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. - * - * This library is free software; you can redistribute it and/or modify it under - * the terms of the GNU Lesser General Public License as published by the Free - * Software Foundation; either version 2.1 of the License, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -package com.arsdigita.london.terms.indexing.kea; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.net.URL; -import java.util.HashSet; -import java.util.Set; - -/** - * @author terry_permeance - */ -class Stopwords extends kea.stopwords.Stopwords { - - public Stopwords(String language) throws IOException { - String resource = getClass().getPackage().getName().replace('.', '/') + "/stopwords_" + language + ".txt"; - URL url = getClass().getClassLoader().getResource(resource); - if (url == null) { - throw new IOException("Could not find resource " + resource); - } - BufferedReader br = new BufferedReader(new InputStreamReader(url.openStream())); - String stopWord = null; - m_stopWords = new HashSet(); - try { - while ((stopWord = br.readLine()) != null) { - m_stopWords.add(stopWord); - } - } finally { - br.close(); - } - } - - public boolean isStopword(String str) { - return m_stopWords.contains(str); - } - - private final Set m_stopWords; -} +/* + * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. + * + * This library is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +package com.arsdigita.london.terms.indexing.kea; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.URL; +import java.util.HashSet; +import java.util.Set; + +/** + * @author terry_permeance + */ +class Stopwords extends kea.stopwords.Stopwords { + + public Stopwords(String language) throws IOException { + String resource = getClass().getPackage().getName().replace('.', '/') + "/stopwords_" + language + ".txt"; + URL url = getClass().getClassLoader().getResource(resource); + if (url == null) { + throw new IOException("Could not find resource " + resource); + } + BufferedReader br = new BufferedReader(new InputStreamReader(url.openStream())); + String stopWord = null; + m_stopWords = new HashSet(); + try { + while ((stopWord = br.readLine()) != null) { + m_stopWords.add(stopWord); + } + } finally { + br.close(); + } + } + + public boolean isStopword(String str) { + return m_stopWords.contains(str); + } + + private final Set m_stopWords; +} diff --git a/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/VocabularyBuilder.java b/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/VocabularyBuilder.java index 74822abfd..450dc3785 100644 --- a/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/VocabularyBuilder.java +++ b/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/VocabularyBuilder.java @@ -1,167 +1,167 @@ -/* - * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. - * - * This library is free software; you can redistribute it and/or modify it under - * the terms of the GNU Lesser General Public License as published by the Free - * Software Foundation; either version 2.1 of the License, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -package com.arsdigita.london.terms.indexing.kea; - -import java.io.IOException; -import java.lang.reflect.Field; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.util.HashMap; -import java.util.Map; -import java.util.Vector; - -import kea.stemmers.PorterStemmer; -import kea.vocab.Vocabulary; - -import org.apache.log4j.Logger; - -import com.arsdigita.london.terms.Domain; - -/** - * @author terry_permeance - */ -class VocabularyBuilder { - - private static final Logger s_log = Logger.getLogger(VocabularyBuilder.class); - - private final Domain m_domain; - - private final String m_language; - - public VocabularyBuilder(Domain domain, String language) { - m_domain = domain; - m_language = language; - } - - public Vocabulary build() throws IOException { - s_log.info("Building vocabulary for domain " + m_domain.getKey() + "..."); - - Vocabulary vocabulary = new Vocabulary(m_domain.getKey(), "aplaws", m_language); - vocabulary.setStemmer(new PorterStemmer()); - vocabulary.setStopwords(new Stopwords(m_language)); - - Map vocabularyEN = createMap(vocabulary, "VocabularyEN"); - Map vocabularyENrev = createMap(vocabulary, "VocabularyENrev"); - Map vocabularyREL = createMap(vocabulary, "VocabularyREL"); - Map vocabularyRT = createMap(vocabulary, "VocabularyRT"); - createMap(vocabulary, "VocabularyUSE"); - - Queries.PreferredTerms preferredTerms = new Queries.PreferredTerms(m_domain); - try { - while (preferredTerms.next()) { - String id = preferredTerms.getUniqueID(); - String descriptor = preferredTerms.getName(); - String avterm = vocabulary.pseudoPhrase(descriptor); - if (avterm == null) { - avterm = descriptor; - } - if (avterm.length() > 1) { - vocabularyEN.put(avterm, id); - vocabularyENrev.put(id, descriptor); - } - } - if (s_log.isDebugEnabled()) { - s_log.debug(" --> Built " + vocabularyEN.size() + " preferred terms"); - } - } finally { - preferredTerms.close(); - } - - Queries.NonPreferredTerms nonPreferredTerms = new Queries.NonPreferredTerms(m_domain); - int count = 1; - try { - while (nonPreferredTerms.next()) { - String preferred_id = nonPreferredTerms.getPreferredUniqueID(); - String descriptor = nonPreferredTerms.getName(); - addNonDescriptor(vocabulary, count++, preferred_id, descriptor); - } - if (s_log.isDebugEnabled()) { - s_log.debug(" --> Built " + count + " non-preferred terms"); - } - } finally { - preferredTerms.close(); - } - - Queries.RelatedTerms relatedTerms = new Queries.RelatedTerms(m_domain); - try { - while (relatedTerms.next()) { - String id = relatedTerms.getUniqueID(); - String relationType = relatedTerms.getRelationType(); - String id_related = relatedTerms.getRelatedUniqueID(); - - Vector relatedIds = (Vector) vocabularyREL.get(id); - if (relatedIds == null) { - relatedIds = new Vector(); - vocabularyREL.put(id, relatedIds); - } - relatedIds.add(id_related); - - if ("child".equals(relationType)) { - vocabularyRT.put(id + "-" + id_related, "narrower"); - vocabularyRT.put(id_related + "-" + id, "broader"); - } else { - vocabularyRT.put(id + "-" + id_related, "related"); - vocabularyRT.put(id_related + "-" + id, "related"); - } - } - if (s_log.isDebugEnabled()) { - s_log.debug(" --> Built " + vocabularyRT.size() + " relationships"); - } - } finally { - preferredTerms.close(); - } - s_log.info("Built vocabulary for domain " + m_domain.getKey()); - return vocabulary; - } - - private Map createMap(Vocabulary vocabulary, String fieldName) { - try { - Map map = new HashMap(106033); - Field field = vocabulary.getClass().getDeclaredField(fieldName); - field.setAccessible(true); - field.set(vocabulary, map); - return map; - } catch (NoSuchFieldException e) { - throw new RuntimeException(e); - } catch (SecurityException e) { - throw new RuntimeException(e); - } catch (IllegalAccessException e) { - throw new RuntimeException(e); - } - } - - private void addNonDescriptor(Vocabulary vocabulary, int count, String id_descriptor, String non_descriptor) { - - try { - Method addNonDescriptor = vocabulary.getClass().getDeclaredMethod("addNonDescriptor", - new Class[] { Integer.TYPE, String.class, String.class }); - addNonDescriptor.setAccessible(true); - addNonDescriptor.invoke(vocabulary, new Object[] { Integer.valueOf(count), id_descriptor, non_descriptor }); - } catch (SecurityException e) { - throw new RuntimeException(e); - } catch (NoSuchMethodException e) { - throw new RuntimeException(e); - } catch (IllegalAccessException e) { - throw new RuntimeException(e); - } catch (InvocationTargetException e) { - throw new RuntimeException(e); - } - } -} +/* + * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. + * + * This library is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +package com.arsdigita.london.terms.indexing.kea; + +import java.io.IOException; +import java.lang.reflect.Field; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.HashMap; +import java.util.Map; +import java.util.Vector; + +import kea.stemmers.PorterStemmer; +import kea.vocab.Vocabulary; + +import org.apache.log4j.Logger; + +import com.arsdigita.london.terms.Domain; + +/** + * @author terry_permeance + */ +class VocabularyBuilder { + + private static final Logger s_log = Logger.getLogger(VocabularyBuilder.class); + + private final Domain m_domain; + + private final String m_language; + + public VocabularyBuilder(Domain domain, String language) { + m_domain = domain; + m_language = language; + } + + public Vocabulary build() throws IOException { + s_log.info("Building vocabulary for domain " + m_domain.getKey() + "..."); + + Vocabulary vocabulary = new Vocabulary(m_domain.getKey(), "aplaws", m_language); + vocabulary.setStemmer(new PorterStemmer()); + vocabulary.setStopwords(new Stopwords(m_language)); + + Map vocabularyEN = createMap(vocabulary, "VocabularyEN"); + Map vocabularyENrev = createMap(vocabulary, "VocabularyENrev"); + Map vocabularyREL = createMap(vocabulary, "VocabularyREL"); + Map vocabularyRT = createMap(vocabulary, "VocabularyRT"); + createMap(vocabulary, "VocabularyUSE"); + + Queries.PreferredTerms preferredTerms = new Queries.PreferredTerms(m_domain); + try { + while (preferredTerms.next()) { + String id = preferredTerms.getUniqueID(); + String descriptor = preferredTerms.getName(); + String avterm = vocabulary.pseudoPhrase(descriptor); + if (avterm == null) { + avterm = descriptor; + } + if (avterm.length() > 1) { + vocabularyEN.put(avterm, id); + vocabularyENrev.put(id, descriptor); + } + } + if (s_log.isDebugEnabled()) { + s_log.debug(" --> Built " + vocabularyEN.size() + " preferred terms"); + } + } finally { + preferredTerms.close(); + } + + Queries.NonPreferredTerms nonPreferredTerms = new Queries.NonPreferredTerms(m_domain); + int count = 1; + try { + while (nonPreferredTerms.next()) { + String preferred_id = nonPreferredTerms.getPreferredUniqueID(); + String descriptor = nonPreferredTerms.getName(); + addNonDescriptor(vocabulary, count++, preferred_id, descriptor); + } + if (s_log.isDebugEnabled()) { + s_log.debug(" --> Built " + count + " non-preferred terms"); + } + } finally { + preferredTerms.close(); + } + + Queries.RelatedTerms relatedTerms = new Queries.RelatedTerms(m_domain); + try { + while (relatedTerms.next()) { + String id = relatedTerms.getUniqueID(); + String relationType = relatedTerms.getRelationType(); + String id_related = relatedTerms.getRelatedUniqueID(); + + Vector relatedIds = (Vector) vocabularyREL.get(id); + if (relatedIds == null) { + relatedIds = new Vector(); + vocabularyREL.put(id, relatedIds); + } + relatedIds.add(id_related); + + if ("child".equals(relationType)) { + vocabularyRT.put(id + "-" + id_related, "narrower"); + vocabularyRT.put(id_related + "-" + id, "broader"); + } else { + vocabularyRT.put(id + "-" + id_related, "related"); + vocabularyRT.put(id_related + "-" + id, "related"); + } + } + if (s_log.isDebugEnabled()) { + s_log.debug(" --> Built " + vocabularyRT.size() + " relationships"); + } + } finally { + preferredTerms.close(); + } + s_log.info("Built vocabulary for domain " + m_domain.getKey()); + return vocabulary; + } + + private Map createMap(Vocabulary vocabulary, String fieldName) { + try { + Map map = new HashMap(106033); + Field field = vocabulary.getClass().getDeclaredField(fieldName); + field.setAccessible(true); + field.set(vocabulary, map); + return map; + } catch (NoSuchFieldException e) { + throw new RuntimeException(e); + } catch (SecurityException e) { + throw new RuntimeException(e); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } + } + + private void addNonDescriptor(Vocabulary vocabulary, int count, String id_descriptor, String non_descriptor) { + + try { + Method addNonDescriptor = vocabulary.getClass().getDeclaredMethod("addNonDescriptor", + new Class[] { Integer.TYPE, String.class, String.class }); + addNonDescriptor.setAccessible(true); + addNonDescriptor.invoke(vocabulary, new Object[] { Integer.valueOf(count), id_descriptor, non_descriptor }); + } catch (SecurityException e) { + throw new RuntimeException(e); + } catch (NoSuchMethodException e) { + throw new RuntimeException(e); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } catch (InvocationTargetException e) { + throw new RuntimeException(e); + } + } +} diff --git a/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/VocabularyCache.java b/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/VocabularyCache.java index 7b51d6a0a..996ac8bd1 100644 --- a/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/VocabularyCache.java +++ b/ccm-ldn-terms/src/com/arsdigita/london/terms/indexing/kea/VocabularyCache.java @@ -1,50 +1,50 @@ -/* - * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. - * - * This library is free software; you can redistribute it and/or modify it under - * the terms of the GNU Lesser General Public License as published by the Free - * Software Foundation; either version 2.1 of the License, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -package com.arsdigita.london.terms.indexing.kea; - -import java.io.IOException; - -import kea.vocab.Vocabulary; - -import com.arsdigita.caching.CacheTable; -import com.arsdigita.london.terms.Domain; - -/** - * @author terry_permeance - */ -class VocabularyCache { - - public static Vocabulary getVocabulary(Domain domain, String language) throws IOException { - String key = domain.getKey() + "_" + language; - Vocabulary vocabulary = (Vocabulary) s_cache.get(key); - if (vocabulary == null) { - VocabularyBuilder builder = new VocabularyBuilder(domain, language); - vocabulary = builder.build(); - s_cache.put(key, vocabulary); - } - return vocabulary; - } - - public static void reset() { - s_cache.removeAll(); - } - - private static final CacheTable s_cache = new CacheTable("VocabularyCache", false); -} +/* + * Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved. + * + * This library is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +package com.arsdigita.london.terms.indexing.kea; + +import java.io.IOException; + +import kea.vocab.Vocabulary; + +import com.arsdigita.caching.CacheTable; +import com.arsdigita.london.terms.Domain; + +/** + * @author terry_permeance + */ +class VocabularyCache { + + public static Vocabulary getVocabulary(Domain domain, String language) throws IOException { + String key = domain.getKey() + "_" + language; + Vocabulary vocabulary = (Vocabulary) s_cache.get(key); + if (vocabulary == null) { + VocabularyBuilder builder = new VocabularyBuilder(domain, language); + vocabulary = builder.build(); + s_cache.put(key, vocabulary); + } + return vocabulary; + } + + public static void reset() { + s_cache.removeAll(); + } + + private static final CacheTable s_cache = new CacheTable("VocabularyCache", false); +} diff --git a/ccm-ldn-terms/src/com/arsdigita/london/terms/ui/admin/TermForm.java b/ccm-ldn-terms/src/com/arsdigita/london/terms/ui/admin/TermForm.java index 906a7fb70..72f2dc789 100755 --- a/ccm-ldn-terms/src/com/arsdigita/london/terms/ui/admin/TermForm.java +++ b/ccm-ldn-terms/src/com/arsdigita/london/terms/ui/admin/TermForm.java @@ -44,6 +44,7 @@ import com.arsdigita.domain.DataObjectNotFoundException; import com.arsdigita.london.terms.Domain; import com.arsdigita.london.terms.Term; import com.arsdigita.london.terms.Terms; +import com.arsdigita.london.terms.Util; import com.arsdigita.london.util.ui.parameters.DomainObjectParameter; import com.arsdigita.util.UncheckedWrapperException; @@ -146,7 +147,7 @@ public class TermForm extends Form { if (term == null) { Domain domain = (Domain)state.getValue(m_domain); - m_uniqueid.setValue(state, null); + m_uniqueid.setValue(state, Util.getNextTermID(domain)); m_name.setValue(state, null); m_desc.setValue(state, null); m_shortcut.setValue(state, null); diff --git a/ccm-ldn-terms/src/kea/stemmers/SpanishStemmerSB.java b/ccm-ldn-terms/src/kea/stemmers/SpanishStemmerSB.java index ae301e56b..8ad7076ca 100644 --- a/ccm-ldn-terms/src/kea/stemmers/SpanishStemmerSB.java +++ b/ccm-ldn-terms/src/kea/stemmers/SpanishStemmerSB.java @@ -1,1180 +1,1180 @@ -package kea.stemmers; - -import org.tartarus.snowball.Among; -import org.tartarus.snowball.SnowballProgram; - -// This file was generated automatically by the Snowball to Java compiler - -/** - * Generated class implementing code defined by a snowball script. - */ -public class SpanishStemmerSB extends SnowballProgram { - - private static final long serialVersionUID = 1L; - - private Among a_0[] = { - new Among ( "", -1, 6, "", this), - new Among ( "\u00E1", 0, 1, "", this), - new Among ( "\u00E9", 0, 2, "", this), - new Among ( "\u00ED", 0, 3, "", this), - new Among ( "\u00F3", 0, 4, "", this), - new Among ( "\u00FA", 0, 5, "", this) - }; - - private Among a_1[] = { - new Among ( "la", -1, -1, "", this), - new Among ( "sela", 0, -1, "", this), - new Among ( "le", -1, -1, "", this), - new Among ( "me", -1, -1, "", this), - new Among ( "se", -1, -1, "", this), - new Among ( "lo", -1, -1, "", this), - new Among ( "selo", 5, -1, "", this), - new Among ( "las", -1, -1, "", this), - new Among ( "selas", 7, -1, "", this), - new Among ( "les", -1, -1, "", this), - new Among ( "los", -1, -1, "", this), - new Among ( "selos", 10, -1, "", this), - new Among ( "nos", -1, -1, "", this) - }; - - private Among a_2[] = { - new Among ( "ando", -1, 6, "", this), - new Among ( "iendo", -1, 6, "", this), - new Among ( "yendo", -1, 7, "", this), - new Among ( "\u00E1ndo", -1, 2, "", this), - new Among ( "i\u00E9ndo", -1, 1, "", this), - new Among ( "ar", -1, 6, "", this), - new Among ( "er", -1, 6, "", this), - new Among ( "ir", -1, 6, "", this), - new Among ( "\u00E1r", -1, 3, "", this), - new Among ( "\u00E9r", -1, 4, "", this), - new Among ( "\u00EDr", -1, 5, "", this) - }; - - private Among a_3[] = { - new Among ( "ic", -1, -1, "", this), - new Among ( "ad", -1, -1, "", this), - new Among ( "os", -1, -1, "", this), - new Among ( "iv", -1, 1, "", this) - }; - - private Among a_4[] = { - new Among ( "able", -1, 1, "", this), - new Among ( "ible", -1, 1, "", this) - }; - - private Among a_5[] = { - new Among ( "ic", -1, 1, "", this), - new Among ( "abil", -1, 1, "", this), - new Among ( "iv", -1, 1, "", this) - }; - - private Among a_6[] = { - new Among ( "ica", -1, 1, "", this), - new Among ( "encia", -1, 5, "", this), - new Among ( "adora", -1, 2, "", this), - new Among ( "osa", -1, 1, "", this), - new Among ( "ista", -1, 1, "", this), - new Among ( "iva", -1, 9, "", this), - new Among ( "anza", -1, 1, "", this), - new Among ( "log\u00EDa", -1, 3, "", this), - new Among ( "idad", -1, 8, "", this), - new Among ( "able", -1, 1, "", this), - new Among ( "ible", -1, 1, "", this), - new Among ( "mente", -1, 7, "", this), - new Among ( "amente", 11, 6, "", this), - new Among ( "aci\u00F3n", -1, 2, "", this), - new Among ( "uci\u00F3n", -1, 4, "", this), - new Among ( "ico", -1, 1, "", this), - new Among ( "ismo", -1, 1, "", this), - new Among ( "oso", -1, 1, "", this), - new Among ( "amiento", -1, 1, "", this), - new Among ( "imiento", -1, 1, "", this), - new Among ( "ivo", -1, 9, "", this), - new Among ( "ador", -1, 2, "", this), - new Among ( "icas", -1, 1, "", this), - new Among ( "encias", -1, 5, "", this), - new Among ( "adoras", -1, 2, "", this), - new Among ( "osas", -1, 1, "", this), - new Among ( "istas", -1, 1, "", this), - new Among ( "ivas", -1, 9, "", this), - new Among ( "anzas", -1, 1, "", this), - new Among ( "log\u00EDas", -1, 3, "", this), - new Among ( "idades", -1, 8, "", this), - new Among ( "ables", -1, 1, "", this), - new Among ( "ibles", -1, 1, "", this), - new Among ( "aciones", -1, 2, "", this), - new Among ( "uciones", -1, 4, "", this), - new Among ( "adores", -1, 2, "", this), - new Among ( "icos", -1, 1, "", this), - new Among ( "ismos", -1, 1, "", this), - new Among ( "osos", -1, 1, "", this), - new Among ( "amientos", -1, 1, "", this), - new Among ( "imientos", -1, 1, "", this), - new Among ( "ivos", -1, 9, "", this) - }; - - private Among a_7[] = { - new Among ( "ya", -1, 1, "", this), - new Among ( "ye", -1, 1, "", this), - new Among ( "yan", -1, 1, "", this), - new Among ( "yen", -1, 1, "", this), - new Among ( "yeron", -1, 1, "", this), - new Among ( "yendo", -1, 1, "", this), - new Among ( "yo", -1, 1, "", this), - new Among ( "yas", -1, 1, "", this), - new Among ( "yes", -1, 1, "", this), - new Among ( "yais", -1, 1, "", this), - new Among ( "yamos", -1, 1, "", this), - new Among ( "y\u00F3", -1, 1, "", this) - }; - - private Among a_8[] = { - new Among ( "aba", -1, 2, "", this), - new Among ( "ada", -1, 2, "", this), - new Among ( "ida", -1, 2, "", this), - new Among ( "ara", -1, 2, "", this), - new Among ( "iera", -1, 2, "", this), - new Among ( "\u00EDa", -1, 2, "", this), - new Among ( "ar\u00EDa", 5, 2, "", this), - new Among ( "er\u00EDa", 5, 2, "", this), - new Among ( "ir\u00EDa", 5, 2, "", this), - new Among ( "ad", -1, 2, "", this), - new Among ( "ed", -1, 2, "", this), - new Among ( "id", -1, 2, "", this), - new Among ( "ase", -1, 2, "", this), - new Among ( "iese", -1, 2, "", this), - new Among ( "aste", -1, 2, "", this), - new Among ( "iste", -1, 2, "", this), - new Among ( "an", -1, 2, "", this), - new Among ( "aban", 16, 2, "", this), - new Among ( "aran", 16, 2, "", this), - new Among ( "ieran", 16, 2, "", this), - new Among ( "\u00EDan", 16, 2, "", this), - new Among ( "ar\u00EDan", 20, 2, "", this), - new Among ( "er\u00EDan", 20, 2, "", this), - new Among ( "ir\u00EDan", 20, 2, "", this), - new Among ( "en", -1, 1, "", this), - new Among ( "asen", 24, 2, "", this), - new Among ( "iesen", 24, 2, "", this), - new Among ( "aron", -1, 2, "", this), - new Among ( "ieron", -1, 2, "", this), - new Among ( "ar\u00E1n", -1, 2, "", this), - new Among ( "er\u00E1n", -1, 2, "", this), - new Among ( "ir\u00E1n", -1, 2, "", this), - new Among ( "ado", -1, 2, "", this), - new Among ( "ido", -1, 2, "", this), - new Among ( "ando", -1, 2, "", this), - new Among ( "iendo", -1, 2, "", this), - new Among ( "ar", -1, 2, "", this), - new Among ( "er", -1, 2, "", this), - new Among ( "ir", -1, 2, "", this), - new Among ( "as", -1, 2, "", this), - new Among ( "abas", 39, 2, "", this), - new Among ( "adas", 39, 2, "", this), - new Among ( "idas", 39, 2, "", this), - new Among ( "aras", 39, 2, "", this), - new Among ( "ieras", 39, 2, "", this), - new Among ( "\u00EDas", 39, 2, "", this), - new Among ( "ar\u00EDas", 45, 2, "", this), - new Among ( "er\u00EDas", 45, 2, "", this), - new Among ( "ir\u00EDas", 45, 2, "", this), - new Among ( "es", -1, 1, "", this), - new Among ( "ases", 49, 2, "", this), - new Among ( "ieses", 49, 2, "", this), - new Among ( "abais", -1, 2, "", this), - new Among ( "arais", -1, 2, "", this), - new Among ( "ierais", -1, 2, "", this), - new Among ( "\u00EDais", -1, 2, "", this), - new Among ( "ar\u00EDais", 55, 2, "", this), - new Among ( "er\u00EDais", 55, 2, "", this), - new Among ( "ir\u00EDais", 55, 2, "", this), - new Among ( "aseis", -1, 2, "", this), - new Among ( "ieseis", -1, 2, "", this), - new Among ( "asteis", -1, 2, "", this), - new Among ( "isteis", -1, 2, "", this), - new Among ( "\u00E1is", -1, 2, "", this), - new Among ( "\u00E9is", -1, 1, "", this), - new Among ( "ar\u00E9is", 64, 2, "", this), - new Among ( "er\u00E9is", 64, 2, "", this), - new Among ( "ir\u00E9is", 64, 2, "", this), - new Among ( "ados", -1, 2, "", this), - new Among ( "idos", -1, 2, "", this), - new Among ( "amos", -1, 2, "", this), - new Among ( "\u00E1bamos", 70, 2, "", this), - new Among ( "\u00E1ramos", 70, 2, "", this), - new Among ( "i\u00E9ramos", 70, 2, "", this), - new Among ( "\u00EDamos", 70, 2, "", this), - new Among ( "ar\u00EDamos", 74, 2, "", this), - new Among ( "er\u00EDamos", 74, 2, "", this), - new Among ( "ir\u00EDamos", 74, 2, "", this), - new Among ( "emos", -1, 1, "", this), - new Among ( "aremos", 78, 2, "", this), - new Among ( "eremos", 78, 2, "", this), - new Among ( "iremos", 78, 2, "", this), - new Among ( "\u00E1semos", 78, 2, "", this), - new Among ( "i\u00E9semos", 78, 2, "", this), - new Among ( "imos", -1, 2, "", this), - new Among ( "ar\u00E1s", -1, 2, "", this), - new Among ( "er\u00E1s", -1, 2, "", this), - new Among ( "ir\u00E1s", -1, 2, "", this), - new Among ( "\u00EDs", -1, 2, "", this), - new Among ( "ar\u00E1", -1, 2, "", this), - new Among ( "er\u00E1", -1, 2, "", this), - new Among ( "ir\u00E1", -1, 2, "", this), - new Among ( "ar\u00E9", -1, 2, "", this), - new Among ( "er\u00E9", -1, 2, "", this), - new Among ( "ir\u00E9", -1, 2, "", this), - new Among ( "i\u00F3", -1, 2, "", this) - }; - - private Among a_9[] = { - new Among ( "a", -1, 1, "", this), - new Among ( "e", -1, 2, "", this), - new Among ( "o", -1, 1, "", this), - new Among ( "os", -1, 1, "", this), - new Among ( "\u00E1", -1, 1, "", this), - new Among ( "\u00E9", -1, 2, "", this), - new Among ( "\u00ED", -1, 1, "", this), - new Among ( "\u00F3", -1, 1, "", this) - }; - - private static final char g_v[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 4, 10 }; - - private int I_p2; - private int I_p1; - private int I_pV; - - /* private void copy_from(SpanishStemmer other) { - I_p2 = other.I_p2; - I_p1 = other.I_p1; - I_pV = other.I_pV; - super.copy_from(other); - } -*/ - private boolean r_mark_regions() { - int v_1; - int v_2; - int v_3; - int v_6; - int v_8; - // (, line 31 - I_pV = limit; - I_p1 = limit; - I_p2 = limit; - // do, line 37 - v_1 = cursor; - lab0: do { - // (, line 37 - // or, line 39 - lab1: do { - v_2 = cursor; - lab2: do { - // (, line 38 - if (!(in_grouping(g_v, 97, 252))) - { - break lab2; - } - // or, line 38 - lab3: do { - v_3 = cursor; - lab4: do { - // (, line 38 - if (!(out_grouping(g_v, 97, 252))) - { - break lab4; - } - // gopast, line 38 - golab5: while(true) - { - lab6: do { - if (!(in_grouping(g_v, 97, 252))) - { - break lab6; - } - break golab5; - } while (false); - if (cursor >= limit) - { - break lab4; - } - cursor++; - } - break lab3; - } while (false); - cursor = v_3; - // (, line 38 - if (!(in_grouping(g_v, 97, 252))) - { - break lab2; - } - // gopast, line 38 - golab7: while(true) - { - lab8: do { - if (!(out_grouping(g_v, 97, 252))) - { - break lab8; - } - break golab7; - } while (false); - if (cursor >= limit) - { - break lab2; - } - cursor++; - } - } while (false); - break lab1; - } while (false); - cursor = v_2; - // (, line 40 - if (!(out_grouping(g_v, 97, 252))) - { - break lab0; - } - // or, line 40 - lab9: do { - v_6 = cursor; - lab10: do { - // (, line 40 - if (!(out_grouping(g_v, 97, 252))) - { - break lab10; - } - // gopast, line 40 - golab11: while(true) - { - lab12: do { - if (!(in_grouping(g_v, 97, 252))) - { - break lab12; - } - break golab11; - } while (false); - if (cursor >= limit) - { - break lab10; - } - cursor++; - } - break lab9; - } while (false); - cursor = v_6; - // (, line 40 - if (!(in_grouping(g_v, 97, 252))) - { - break lab0; - } - // next, line 40 - if (cursor >= limit) - { - break lab0; - } - cursor++; - } while (false); - } while (false); - // setmark pV, line 41 - I_pV = cursor; - } while (false); - cursor = v_1; - // do, line 43 - v_8 = cursor; - lab13: do { - // (, line 43 - // gopast, line 44 - golab14: while(true) - { - lab15: do { - if (!(in_grouping(g_v, 97, 252))) - { - break lab15; - } - break golab14; - } while (false); - if (cursor >= limit) - { - break lab13; - } - cursor++; - } - // gopast, line 44 - golab16: while(true) - { - lab17: do { - if (!(out_grouping(g_v, 97, 252))) - { - break lab17; - } - break golab16; - } while (false); - if (cursor >= limit) - { - break lab13; - } - cursor++; - } - // setmark p1, line 44 - I_p1 = cursor; - // gopast, line 45 - golab18: while(true) - { - lab19: do { - if (!(in_grouping(g_v, 97, 252))) - { - break lab19; - } - break golab18; - } while (false); - if (cursor >= limit) - { - break lab13; - } - cursor++; - } - // gopast, line 45 - golab20: while(true) - { - lab21: do { - if (!(out_grouping(g_v, 97, 252))) - { - break lab21; - } - break golab20; - } while (false); - if (cursor >= limit) - { - break lab13; - } - cursor++; - } - // setmark p2, line 45 - I_p2 = cursor; - } while (false); - cursor = v_8; - return true; - } - - private boolean r_postlude() { - int among_var; - int v_1; - // repeat, line 49 - replab0: while(true) - { - v_1 = cursor; - lab1: do { - // (, line 49 - // [, line 50 - bra = cursor; - // substring, line 50 - among_var = find_among(a_0, 6); - if (among_var == 0) - { - break lab1; - } - // ], line 50 - ket = cursor; - switch(among_var) { - case 0: - break lab1; - case 1: - // (, line 51 - // <-, line 51 - slice_from("a"); - break; - case 2: - // (, line 52 - // <-, line 52 - slice_from("e"); - break; - case 3: - // (, line 53 - // <-, line 53 - slice_from("i"); - break; - case 4: - // (, line 54 - // <-, line 54 - slice_from("o"); - break; - case 5: - // (, line 55 - // <-, line 55 - slice_from("u"); - break; - case 6: - // (, line 57 - // next, line 57 - if (cursor >= limit) - { - break lab1; - } - cursor++; - break; - } - continue replab0; - } while (false); - cursor = v_1; - break replab0; - } - return true; - } - - private boolean r_RV() { - if (!(I_pV <= cursor)) - { - return false; - } - return true; - } - - private boolean r_R1() { - if (!(I_p1 <= cursor)) - { - return false; - } - return true; - } - - private boolean r_R2() { - if (!(I_p2 <= cursor)) - { - return false; - } - return true; - } - - private boolean r_attached_pronoun() { - int among_var; - // (, line 67 - // [, line 68 - ket = cursor; - // substring, line 68 - if (find_among_b(a_1, 13) == 0) - { - return false; - } - // ], line 68 - bra = cursor; - // substring, line 72 - among_var = find_among_b(a_2, 11); - if (among_var == 0) - { - return false; - } - // call RV, line 72 - if (!r_RV()) - { - return false; - } - switch(among_var) { - case 0: - return false; - case 1: - // (, line 73 - // ], line 73 - bra = cursor; - // <-, line 73 - slice_from("iendo"); - break; - case 2: - // (, line 74 - // ], line 74 - bra = cursor; - // <-, line 74 - slice_from("ando"); - break; - case 3: - // (, line 75 - // ], line 75 - bra = cursor; - // <-, line 75 - slice_from("ar"); - break; - case 4: - // (, line 76 - // ], line 76 - bra = cursor; - // <-, line 76 - slice_from("er"); - break; - case 5: - // (, line 77 - // ], line 77 - bra = cursor; - // <-, line 77 - slice_from("ir"); - break; - case 6: - // (, line 81 - // delete, line 81 - slice_del(); - break; - case 7: - // (, line 82 - // literal, line 82 - if (!(eq_s_b(1, "u"))) - { - return false; - } - // delete, line 82 - slice_del(); - break; - } - return true; - } - - private boolean r_standard_suffix() { - int among_var; - int v_1; - int v_2; - int v_3; - int v_4; - int v_5; - // (, line 86 - // [, line 87 - ket = cursor; - // substring, line 87 - among_var = find_among_b(a_6, 42); - if (among_var == 0) - { - return false; - } - // ], line 87 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 98 - // call R2, line 99 - if (!r_R2()) - { - return false; - } - // delete, line 99 - slice_del(); - break; - case 2: - // (, line 103 - // call R2, line 104 - if (!r_R2()) - { - return false; - } - // delete, line 104 - slice_del(); - // try, line 105 - v_1 = limit - cursor; - lab0: do { - // (, line 105 - // [, line 105 - ket = cursor; - // literal, line 105 - if (!(eq_s_b(2, "ic"))) - { - cursor = limit - v_1; - break lab0; - } - // ], line 105 - bra = cursor; - // call R2, line 105 - if (!r_R2()) - { - cursor = limit - v_1; - break lab0; - } - // delete, line 105 - slice_del(); - } while (false); - break; - case 3: - // (, line 109 - // call R2, line 110 - if (!r_R2()) - { - return false; - } - // <-, line 110 - slice_from("log"); - break; - case 4: - // (, line 113 - // call R2, line 114 - if (!r_R2()) - { - return false; - } - // <-, line 114 - slice_from("u"); - break; - case 5: - // (, line 117 - // call R2, line 118 - if (!r_R2()) - { - return false; - } - // <-, line 118 - slice_from("ente"); - break; - case 6: - // (, line 121 - // call R1, line 122 - if (!r_R1()) - { - return false; - } - // delete, line 122 - slice_del(); - // try, line 123 - v_2 = limit - cursor; - lab1: do { - // (, line 123 - // [, line 124 - ket = cursor; - // substring, line 124 - among_var = find_among_b(a_3, 4); - if (among_var == 0) - { - cursor = limit - v_2; - break lab1; - } - // ], line 124 - bra = cursor; - // call R2, line 124 - if (!r_R2()) - { - cursor = limit - v_2; - break lab1; - } - // delete, line 124 - slice_del(); - switch(among_var) { - case 0: - cursor = limit - v_2; - break lab1; - case 1: - // (, line 125 - // [, line 125 - ket = cursor; - // literal, line 125 - if (!(eq_s_b(2, "at"))) - { - cursor = limit - v_2; - break lab1; - } - // ], line 125 - bra = cursor; - // call R2, line 125 - if (!r_R2()) - { - cursor = limit - v_2; - break lab1; - } - // delete, line 125 - slice_del(); - break; - } - } while (false); - break; - case 7: - // (, line 133 - // call R2, line 134 - if (!r_R2()) - { - return false; - } - // delete, line 134 - slice_del(); - // try, line 135 - v_3 = limit - cursor; - lab2: do { - // (, line 135 - // [, line 136 - ket = cursor; - // substring, line 136 - among_var = find_among_b(a_4, 2); - if (among_var == 0) - { - cursor = limit - v_3; - break lab2; - } - // ], line 136 - bra = cursor; - switch(among_var) { - case 0: - cursor = limit - v_3; - break lab2; - case 1: - // (, line 138 - // call R2, line 138 - if (!r_R2()) - { - cursor = limit - v_3; - break lab2; - } - // delete, line 138 - slice_del(); - break; - } - } while (false); - break; - case 8: - // (, line 144 - // call R2, line 145 - if (!r_R2()) - { - return false; - } - // delete, line 145 - slice_del(); - // try, line 146 - v_4 = limit - cursor; - lab3: do { - // (, line 146 - // [, line 147 - ket = cursor; - // substring, line 147 - among_var = find_among_b(a_5, 3); - if (among_var == 0) - { - cursor = limit - v_4; - break lab3; - } - // ], line 147 - bra = cursor; - switch(among_var) { - case 0: - cursor = limit - v_4; - break lab3; - case 1: - // (, line 150 - // call R2, line 150 - if (!r_R2()) - { - cursor = limit - v_4; - break lab3; - } - // delete, line 150 - slice_del(); - break; - } - } while (false); - break; - case 9: - // (, line 156 - // call R2, line 157 - if (!r_R2()) - { - return false; - } - // delete, line 157 - slice_del(); - // try, line 158 - v_5 = limit - cursor; - lab4: do { - // (, line 158 - // [, line 159 - ket = cursor; - // literal, line 159 - if (!(eq_s_b(2, "at"))) - { - cursor = limit - v_5; - break lab4; - } - // ], line 159 - bra = cursor; - // call R2, line 159 - if (!r_R2()) - { - cursor = limit - v_5; - break lab4; - } - // delete, line 159 - slice_del(); - } while (false); - break; - } - return true; - } - - private boolean r_y_verb_suffix() { - int among_var; - int v_1; - int v_2; - // (, line 165 - // setlimit, line 166 - v_1 = limit - cursor; - // tomark, line 166 - if (cursor < I_pV) - { - return false; - } - cursor = I_pV; - v_2 = limit_backward; - limit_backward = cursor; - cursor = limit - v_1; - // (, line 166 - // [, line 166 - ket = cursor; - // substring, line 166 - among_var = find_among_b(a_7, 12); - if (among_var == 0) - { - limit_backward = v_2; - return false; - } - // ], line 166 - bra = cursor; - limit_backward = v_2; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 169 - // literal, line 169 - if (!(eq_s_b(1, "u"))) - { - return false; - } - // delete, line 169 - slice_del(); - break; - } - return true; - } - - private boolean r_verb_suffix() { - int among_var; - int v_1; - int v_2; - int v_3; - int v_4; - // (, line 173 - // setlimit, line 174 - v_1 = limit - cursor; - // tomark, line 174 - if (cursor < I_pV) - { - return false; - } - cursor = I_pV; - v_2 = limit_backward; - limit_backward = cursor; - cursor = limit - v_1; - // (, line 174 - // [, line 174 - ket = cursor; - // substring, line 174 - among_var = find_among_b(a_8, 96); - if (among_var == 0) - { - limit_backward = v_2; - return false; - } - // ], line 174 - bra = cursor; - limit_backward = v_2; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 177 - // try, line 177 - v_3 = limit - cursor; - lab0: do { - // (, line 177 - // literal, line 177 - if (!(eq_s_b(1, "u"))) - { - cursor = limit - v_3; - break lab0; - } - // test, line 177 - v_4 = limit - cursor; - // literal, line 177 - if (!(eq_s_b(1, "g"))) - { - cursor = limit - v_3; - break lab0; - } - cursor = limit - v_4; - } while (false); - // ], line 177 - bra = cursor; - // delete, line 177 - slice_del(); - break; - case 2: - // (, line 198 - // delete, line 198 - slice_del(); - break; - } - return true; - } - - private boolean r_residual_suffix() { - int among_var; - int v_1; - int v_2; - // (, line 202 - // [, line 203 - ket = cursor; - // substring, line 203 - among_var = find_among_b(a_9, 8); - if (among_var == 0) - { - return false; - } - // ], line 203 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 206 - // call RV, line 206 - if (!r_RV()) - { - return false; - } - // delete, line 206 - slice_del(); - break; - case 2: - // (, line 208 - // call RV, line 208 - if (!r_RV()) - { - return false; - } - // delete, line 208 - slice_del(); - // try, line 208 - v_1 = limit - cursor; - lab0: do { - // (, line 208 - // [, line 208 - ket = cursor; - // literal, line 208 - if (!(eq_s_b(1, "u"))) - { - cursor = limit - v_1; - break lab0; - } - // ], line 208 - bra = cursor; - // test, line 208 - v_2 = limit - cursor; - // literal, line 208 - if (!(eq_s_b(1, "g"))) - { - cursor = limit - v_1; - break lab0; - } - cursor = limit - v_2; - // call RV, line 208 - if (!r_RV()) - { - cursor = limit - v_1; - break lab0; - } - // delete, line 208 - slice_del(); - } while (false); - break; - } - return true; - } - - public boolean stem() { - int v_1; - int v_2; - int v_3; - int v_4; - int v_5; - int v_6; - // (, line 213 - // do, line 214 - v_1 = cursor; - lab0: do { - // call mark_regions, line 214 - if (!r_mark_regions()) - { - break lab0; - } - } while (false); - cursor = v_1; - // backwards, line 215 - limit_backward = cursor; cursor = limit; - // (, line 215 - // do, line 216 - v_2 = limit - cursor; - lab1: do { - // call attached_pronoun, line 216 - if (!r_attached_pronoun()) - { - break lab1; - } - } while (false); - cursor = limit - v_2; - // do, line 217 - v_3 = limit - cursor; - lab2: do { - // (, line 217 - // or, line 217 - lab3: do { - v_4 = limit - cursor; - lab4: do { - // call standard_suffix, line 217 - if (!r_standard_suffix()) - { - break lab4; - } - break lab3; - } while (false); - cursor = limit - v_4; - lab5: do { - // call y_verb_suffix, line 218 - if (!r_y_verb_suffix()) - { - break lab5; - } - break lab3; - } while (false); - cursor = limit - v_4; - // call verb_suffix, line 219 - if (!r_verb_suffix()) - { - break lab2; - } - } while (false); - } while (false); - cursor = limit - v_3; - // do, line 221 - v_5 = limit - cursor; - lab6: do { - // call residual_suffix, line 221 - if (!r_residual_suffix()) - { - break lab6; - } - } while (false); - cursor = limit - v_5; - cursor = limit_backward; // do, line 223 - v_6 = cursor; - lab7: do { - // call postlude, line 223 - if (!r_postlude()) - { - break lab7; - } - } while (false); - cursor = v_6; - return true; - } - -} - +package kea.stemmers; + +import org.tartarus.snowball.Among; +import org.tartarus.snowball.SnowballProgram; + +// This file was generated automatically by the Snowball to Java compiler + +/** + * Generated class implementing code defined by a snowball script. + */ +public class SpanishStemmerSB extends SnowballProgram { + + private static final long serialVersionUID = 1L; + + private Among a_0[] = { + new Among ( "", -1, 6, "", this), + new Among ( "\u00E1", 0, 1, "", this), + new Among ( "\u00E9", 0, 2, "", this), + new Among ( "\u00ED", 0, 3, "", this), + new Among ( "\u00F3", 0, 4, "", this), + new Among ( "\u00FA", 0, 5, "", this) + }; + + private Among a_1[] = { + new Among ( "la", -1, -1, "", this), + new Among ( "sela", 0, -1, "", this), + new Among ( "le", -1, -1, "", this), + new Among ( "me", -1, -1, "", this), + new Among ( "se", -1, -1, "", this), + new Among ( "lo", -1, -1, "", this), + new Among ( "selo", 5, -1, "", this), + new Among ( "las", -1, -1, "", this), + new Among ( "selas", 7, -1, "", this), + new Among ( "les", -1, -1, "", this), + new Among ( "los", -1, -1, "", this), + new Among ( "selos", 10, -1, "", this), + new Among ( "nos", -1, -1, "", this) + }; + + private Among a_2[] = { + new Among ( "ando", -1, 6, "", this), + new Among ( "iendo", -1, 6, "", this), + new Among ( "yendo", -1, 7, "", this), + new Among ( "\u00E1ndo", -1, 2, "", this), + new Among ( "i\u00E9ndo", -1, 1, "", this), + new Among ( "ar", -1, 6, "", this), + new Among ( "er", -1, 6, "", this), + new Among ( "ir", -1, 6, "", this), + new Among ( "\u00E1r", -1, 3, "", this), + new Among ( "\u00E9r", -1, 4, "", this), + new Among ( "\u00EDr", -1, 5, "", this) + }; + + private Among a_3[] = { + new Among ( "ic", -1, -1, "", this), + new Among ( "ad", -1, -1, "", this), + new Among ( "os", -1, -1, "", this), + new Among ( "iv", -1, 1, "", this) + }; + + private Among a_4[] = { + new Among ( "able", -1, 1, "", this), + new Among ( "ible", -1, 1, "", this) + }; + + private Among a_5[] = { + new Among ( "ic", -1, 1, "", this), + new Among ( "abil", -1, 1, "", this), + new Among ( "iv", -1, 1, "", this) + }; + + private Among a_6[] = { + new Among ( "ica", -1, 1, "", this), + new Among ( "encia", -1, 5, "", this), + new Among ( "adora", -1, 2, "", this), + new Among ( "osa", -1, 1, "", this), + new Among ( "ista", -1, 1, "", this), + new Among ( "iva", -1, 9, "", this), + new Among ( "anza", -1, 1, "", this), + new Among ( "log\u00EDa", -1, 3, "", this), + new Among ( "idad", -1, 8, "", this), + new Among ( "able", -1, 1, "", this), + new Among ( "ible", -1, 1, "", this), + new Among ( "mente", -1, 7, "", this), + new Among ( "amente", 11, 6, "", this), + new Among ( "aci\u00F3n", -1, 2, "", this), + new Among ( "uci\u00F3n", -1, 4, "", this), + new Among ( "ico", -1, 1, "", this), + new Among ( "ismo", -1, 1, "", this), + new Among ( "oso", -1, 1, "", this), + new Among ( "amiento", -1, 1, "", this), + new Among ( "imiento", -1, 1, "", this), + new Among ( "ivo", -1, 9, "", this), + new Among ( "ador", -1, 2, "", this), + new Among ( "icas", -1, 1, "", this), + new Among ( "encias", -1, 5, "", this), + new Among ( "adoras", -1, 2, "", this), + new Among ( "osas", -1, 1, "", this), + new Among ( "istas", -1, 1, "", this), + new Among ( "ivas", -1, 9, "", this), + new Among ( "anzas", -1, 1, "", this), + new Among ( "log\u00EDas", -1, 3, "", this), + new Among ( "idades", -1, 8, "", this), + new Among ( "ables", -1, 1, "", this), + new Among ( "ibles", -1, 1, "", this), + new Among ( "aciones", -1, 2, "", this), + new Among ( "uciones", -1, 4, "", this), + new Among ( "adores", -1, 2, "", this), + new Among ( "icos", -1, 1, "", this), + new Among ( "ismos", -1, 1, "", this), + new Among ( "osos", -1, 1, "", this), + new Among ( "amientos", -1, 1, "", this), + new Among ( "imientos", -1, 1, "", this), + new Among ( "ivos", -1, 9, "", this) + }; + + private Among a_7[] = { + new Among ( "ya", -1, 1, "", this), + new Among ( "ye", -1, 1, "", this), + new Among ( "yan", -1, 1, "", this), + new Among ( "yen", -1, 1, "", this), + new Among ( "yeron", -1, 1, "", this), + new Among ( "yendo", -1, 1, "", this), + new Among ( "yo", -1, 1, "", this), + new Among ( "yas", -1, 1, "", this), + new Among ( "yes", -1, 1, "", this), + new Among ( "yais", -1, 1, "", this), + new Among ( "yamos", -1, 1, "", this), + new Among ( "y\u00F3", -1, 1, "", this) + }; + + private Among a_8[] = { + new Among ( "aba", -1, 2, "", this), + new Among ( "ada", -1, 2, "", this), + new Among ( "ida", -1, 2, "", this), + new Among ( "ara", -1, 2, "", this), + new Among ( "iera", -1, 2, "", this), + new Among ( "\u00EDa", -1, 2, "", this), + new Among ( "ar\u00EDa", 5, 2, "", this), + new Among ( "er\u00EDa", 5, 2, "", this), + new Among ( "ir\u00EDa", 5, 2, "", this), + new Among ( "ad", -1, 2, "", this), + new Among ( "ed", -1, 2, "", this), + new Among ( "id", -1, 2, "", this), + new Among ( "ase", -1, 2, "", this), + new Among ( "iese", -1, 2, "", this), + new Among ( "aste", -1, 2, "", this), + new Among ( "iste", -1, 2, "", this), + new Among ( "an", -1, 2, "", this), + new Among ( "aban", 16, 2, "", this), + new Among ( "aran", 16, 2, "", this), + new Among ( "ieran", 16, 2, "", this), + new Among ( "\u00EDan", 16, 2, "", this), + new Among ( "ar\u00EDan", 20, 2, "", this), + new Among ( "er\u00EDan", 20, 2, "", this), + new Among ( "ir\u00EDan", 20, 2, "", this), + new Among ( "en", -1, 1, "", this), + new Among ( "asen", 24, 2, "", this), + new Among ( "iesen", 24, 2, "", this), + new Among ( "aron", -1, 2, "", this), + new Among ( "ieron", -1, 2, "", this), + new Among ( "ar\u00E1n", -1, 2, "", this), + new Among ( "er\u00E1n", -1, 2, "", this), + new Among ( "ir\u00E1n", -1, 2, "", this), + new Among ( "ado", -1, 2, "", this), + new Among ( "ido", -1, 2, "", this), + new Among ( "ando", -1, 2, "", this), + new Among ( "iendo", -1, 2, "", this), + new Among ( "ar", -1, 2, "", this), + new Among ( "er", -1, 2, "", this), + new Among ( "ir", -1, 2, "", this), + new Among ( "as", -1, 2, "", this), + new Among ( "abas", 39, 2, "", this), + new Among ( "adas", 39, 2, "", this), + new Among ( "idas", 39, 2, "", this), + new Among ( "aras", 39, 2, "", this), + new Among ( "ieras", 39, 2, "", this), + new Among ( "\u00EDas", 39, 2, "", this), + new Among ( "ar\u00EDas", 45, 2, "", this), + new Among ( "er\u00EDas", 45, 2, "", this), + new Among ( "ir\u00EDas", 45, 2, "", this), + new Among ( "es", -1, 1, "", this), + new Among ( "ases", 49, 2, "", this), + new Among ( "ieses", 49, 2, "", this), + new Among ( "abais", -1, 2, "", this), + new Among ( "arais", -1, 2, "", this), + new Among ( "ierais", -1, 2, "", this), + new Among ( "\u00EDais", -1, 2, "", this), + new Among ( "ar\u00EDais", 55, 2, "", this), + new Among ( "er\u00EDais", 55, 2, "", this), + new Among ( "ir\u00EDais", 55, 2, "", this), + new Among ( "aseis", -1, 2, "", this), + new Among ( "ieseis", -1, 2, "", this), + new Among ( "asteis", -1, 2, "", this), + new Among ( "isteis", -1, 2, "", this), + new Among ( "\u00E1is", -1, 2, "", this), + new Among ( "\u00E9is", -1, 1, "", this), + new Among ( "ar\u00E9is", 64, 2, "", this), + new Among ( "er\u00E9is", 64, 2, "", this), + new Among ( "ir\u00E9is", 64, 2, "", this), + new Among ( "ados", -1, 2, "", this), + new Among ( "idos", -1, 2, "", this), + new Among ( "amos", -1, 2, "", this), + new Among ( "\u00E1bamos", 70, 2, "", this), + new Among ( "\u00E1ramos", 70, 2, "", this), + new Among ( "i\u00E9ramos", 70, 2, "", this), + new Among ( "\u00EDamos", 70, 2, "", this), + new Among ( "ar\u00EDamos", 74, 2, "", this), + new Among ( "er\u00EDamos", 74, 2, "", this), + new Among ( "ir\u00EDamos", 74, 2, "", this), + new Among ( "emos", -1, 1, "", this), + new Among ( "aremos", 78, 2, "", this), + new Among ( "eremos", 78, 2, "", this), + new Among ( "iremos", 78, 2, "", this), + new Among ( "\u00E1semos", 78, 2, "", this), + new Among ( "i\u00E9semos", 78, 2, "", this), + new Among ( "imos", -1, 2, "", this), + new Among ( "ar\u00E1s", -1, 2, "", this), + new Among ( "er\u00E1s", -1, 2, "", this), + new Among ( "ir\u00E1s", -1, 2, "", this), + new Among ( "\u00EDs", -1, 2, "", this), + new Among ( "ar\u00E1", -1, 2, "", this), + new Among ( "er\u00E1", -1, 2, "", this), + new Among ( "ir\u00E1", -1, 2, "", this), + new Among ( "ar\u00E9", -1, 2, "", this), + new Among ( "er\u00E9", -1, 2, "", this), + new Among ( "ir\u00E9", -1, 2, "", this), + new Among ( "i\u00F3", -1, 2, "", this) + }; + + private Among a_9[] = { + new Among ( "a", -1, 1, "", this), + new Among ( "e", -1, 2, "", this), + new Among ( "o", -1, 1, "", this), + new Among ( "os", -1, 1, "", this), + new Among ( "\u00E1", -1, 1, "", this), + new Among ( "\u00E9", -1, 2, "", this), + new Among ( "\u00ED", -1, 1, "", this), + new Among ( "\u00F3", -1, 1, "", this) + }; + + private static final char g_v[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 4, 10 }; + + private int I_p2; + private int I_p1; + private int I_pV; + + /* private void copy_from(SpanishStemmer other) { + I_p2 = other.I_p2; + I_p1 = other.I_p1; + I_pV = other.I_pV; + super.copy_from(other); + } +*/ + private boolean r_mark_regions() { + int v_1; + int v_2; + int v_3; + int v_6; + int v_8; + // (, line 31 + I_pV = limit; + I_p1 = limit; + I_p2 = limit; + // do, line 37 + v_1 = cursor; + lab0: do { + // (, line 37 + // or, line 39 + lab1: do { + v_2 = cursor; + lab2: do { + // (, line 38 + if (!(in_grouping(g_v, 97, 252))) + { + break lab2; + } + // or, line 38 + lab3: do { + v_3 = cursor; + lab4: do { + // (, line 38 + if (!(out_grouping(g_v, 97, 252))) + { + break lab4; + } + // gopast, line 38 + golab5: while(true) + { + lab6: do { + if (!(in_grouping(g_v, 97, 252))) + { + break lab6; + } + break golab5; + } while (false); + if (cursor >= limit) + { + break lab4; + } + cursor++; + } + break lab3; + } while (false); + cursor = v_3; + // (, line 38 + if (!(in_grouping(g_v, 97, 252))) + { + break lab2; + } + // gopast, line 38 + golab7: while(true) + { + lab8: do { + if (!(out_grouping(g_v, 97, 252))) + { + break lab8; + } + break golab7; + } while (false); + if (cursor >= limit) + { + break lab2; + } + cursor++; + } + } while (false); + break lab1; + } while (false); + cursor = v_2; + // (, line 40 + if (!(out_grouping(g_v, 97, 252))) + { + break lab0; + } + // or, line 40 + lab9: do { + v_6 = cursor; + lab10: do { + // (, line 40 + if (!(out_grouping(g_v, 97, 252))) + { + break lab10; + } + // gopast, line 40 + golab11: while(true) + { + lab12: do { + if (!(in_grouping(g_v, 97, 252))) + { + break lab12; + } + break golab11; + } while (false); + if (cursor >= limit) + { + break lab10; + } + cursor++; + } + break lab9; + } while (false); + cursor = v_6; + // (, line 40 + if (!(in_grouping(g_v, 97, 252))) + { + break lab0; + } + // next, line 40 + if (cursor >= limit) + { + break lab0; + } + cursor++; + } while (false); + } while (false); + // setmark pV, line 41 + I_pV = cursor; + } while (false); + cursor = v_1; + // do, line 43 + v_8 = cursor; + lab13: do { + // (, line 43 + // gopast, line 44 + golab14: while(true) + { + lab15: do { + if (!(in_grouping(g_v, 97, 252))) + { + break lab15; + } + break golab14; + } while (false); + if (cursor >= limit) + { + break lab13; + } + cursor++; + } + // gopast, line 44 + golab16: while(true) + { + lab17: do { + if (!(out_grouping(g_v, 97, 252))) + { + break lab17; + } + break golab16; + } while (false); + if (cursor >= limit) + { + break lab13; + } + cursor++; + } + // setmark p1, line 44 + I_p1 = cursor; + // gopast, line 45 + golab18: while(true) + { + lab19: do { + if (!(in_grouping(g_v, 97, 252))) + { + break lab19; + } + break golab18; + } while (false); + if (cursor >= limit) + { + break lab13; + } + cursor++; + } + // gopast, line 45 + golab20: while(true) + { + lab21: do { + if (!(out_grouping(g_v, 97, 252))) + { + break lab21; + } + break golab20; + } while (false); + if (cursor >= limit) + { + break lab13; + } + cursor++; + } + // setmark p2, line 45 + I_p2 = cursor; + } while (false); + cursor = v_8; + return true; + } + + private boolean r_postlude() { + int among_var; + int v_1; + // repeat, line 49 + replab0: while(true) + { + v_1 = cursor; + lab1: do { + // (, line 49 + // [, line 50 + bra = cursor; + // substring, line 50 + among_var = find_among(a_0, 6); + if (among_var == 0) + { + break lab1; + } + // ], line 50 + ket = cursor; + switch(among_var) { + case 0: + break lab1; + case 1: + // (, line 51 + // <-, line 51 + slice_from("a"); + break; + case 2: + // (, line 52 + // <-, line 52 + slice_from("e"); + break; + case 3: + // (, line 53 + // <-, line 53 + slice_from("i"); + break; + case 4: + // (, line 54 + // <-, line 54 + slice_from("o"); + break; + case 5: + // (, line 55 + // <-, line 55 + slice_from("u"); + break; + case 6: + // (, line 57 + // next, line 57 + if (cursor >= limit) + { + break lab1; + } + cursor++; + break; + } + continue replab0; + } while (false); + cursor = v_1; + break replab0; + } + return true; + } + + private boolean r_RV() { + if (!(I_pV <= cursor)) + { + return false; + } + return true; + } + + private boolean r_R1() { + if (!(I_p1 <= cursor)) + { + return false; + } + return true; + } + + private boolean r_R2() { + if (!(I_p2 <= cursor)) + { + return false; + } + return true; + } + + private boolean r_attached_pronoun() { + int among_var; + // (, line 67 + // [, line 68 + ket = cursor; + // substring, line 68 + if (find_among_b(a_1, 13) == 0) + { + return false; + } + // ], line 68 + bra = cursor; + // substring, line 72 + among_var = find_among_b(a_2, 11); + if (among_var == 0) + { + return false; + } + // call RV, line 72 + if (!r_RV()) + { + return false; + } + switch(among_var) { + case 0: + return false; + case 1: + // (, line 73 + // ], line 73 + bra = cursor; + // <-, line 73 + slice_from("iendo"); + break; + case 2: + // (, line 74 + // ], line 74 + bra = cursor; + // <-, line 74 + slice_from("ando"); + break; + case 3: + // (, line 75 + // ], line 75 + bra = cursor; + // <-, line 75 + slice_from("ar"); + break; + case 4: + // (, line 76 + // ], line 76 + bra = cursor; + // <-, line 76 + slice_from("er"); + break; + case 5: + // (, line 77 + // ], line 77 + bra = cursor; + // <-, line 77 + slice_from("ir"); + break; + case 6: + // (, line 81 + // delete, line 81 + slice_del(); + break; + case 7: + // (, line 82 + // literal, line 82 + if (!(eq_s_b(1, "u"))) + { + return false; + } + // delete, line 82 + slice_del(); + break; + } + return true; + } + + private boolean r_standard_suffix() { + int among_var; + int v_1; + int v_2; + int v_3; + int v_4; + int v_5; + // (, line 86 + // [, line 87 + ket = cursor; + // substring, line 87 + among_var = find_among_b(a_6, 42); + if (among_var == 0) + { + return false; + } + // ], line 87 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 98 + // call R2, line 99 + if (!r_R2()) + { + return false; + } + // delete, line 99 + slice_del(); + break; + case 2: + // (, line 103 + // call R2, line 104 + if (!r_R2()) + { + return false; + } + // delete, line 104 + slice_del(); + // try, line 105 + v_1 = limit - cursor; + lab0: do { + // (, line 105 + // [, line 105 + ket = cursor; + // literal, line 105 + if (!(eq_s_b(2, "ic"))) + { + cursor = limit - v_1; + break lab0; + } + // ], line 105 + bra = cursor; + // call R2, line 105 + if (!r_R2()) + { + cursor = limit - v_1; + break lab0; + } + // delete, line 105 + slice_del(); + } while (false); + break; + case 3: + // (, line 109 + // call R2, line 110 + if (!r_R2()) + { + return false; + } + // <-, line 110 + slice_from("log"); + break; + case 4: + // (, line 113 + // call R2, line 114 + if (!r_R2()) + { + return false; + } + // <-, line 114 + slice_from("u"); + break; + case 5: + // (, line 117 + // call R2, line 118 + if (!r_R2()) + { + return false; + } + // <-, line 118 + slice_from("ente"); + break; + case 6: + // (, line 121 + // call R1, line 122 + if (!r_R1()) + { + return false; + } + // delete, line 122 + slice_del(); + // try, line 123 + v_2 = limit - cursor; + lab1: do { + // (, line 123 + // [, line 124 + ket = cursor; + // substring, line 124 + among_var = find_among_b(a_3, 4); + if (among_var == 0) + { + cursor = limit - v_2; + break lab1; + } + // ], line 124 + bra = cursor; + // call R2, line 124 + if (!r_R2()) + { + cursor = limit - v_2; + break lab1; + } + // delete, line 124 + slice_del(); + switch(among_var) { + case 0: + cursor = limit - v_2; + break lab1; + case 1: + // (, line 125 + // [, line 125 + ket = cursor; + // literal, line 125 + if (!(eq_s_b(2, "at"))) + { + cursor = limit - v_2; + break lab1; + } + // ], line 125 + bra = cursor; + // call R2, line 125 + if (!r_R2()) + { + cursor = limit - v_2; + break lab1; + } + // delete, line 125 + slice_del(); + break; + } + } while (false); + break; + case 7: + // (, line 133 + // call R2, line 134 + if (!r_R2()) + { + return false; + } + // delete, line 134 + slice_del(); + // try, line 135 + v_3 = limit - cursor; + lab2: do { + // (, line 135 + // [, line 136 + ket = cursor; + // substring, line 136 + among_var = find_among_b(a_4, 2); + if (among_var == 0) + { + cursor = limit - v_3; + break lab2; + } + // ], line 136 + bra = cursor; + switch(among_var) { + case 0: + cursor = limit - v_3; + break lab2; + case 1: + // (, line 138 + // call R2, line 138 + if (!r_R2()) + { + cursor = limit - v_3; + break lab2; + } + // delete, line 138 + slice_del(); + break; + } + } while (false); + break; + case 8: + // (, line 144 + // call R2, line 145 + if (!r_R2()) + { + return false; + } + // delete, line 145 + slice_del(); + // try, line 146 + v_4 = limit - cursor; + lab3: do { + // (, line 146 + // [, line 147 + ket = cursor; + // substring, line 147 + among_var = find_among_b(a_5, 3); + if (among_var == 0) + { + cursor = limit - v_4; + break lab3; + } + // ], line 147 + bra = cursor; + switch(among_var) { + case 0: + cursor = limit - v_4; + break lab3; + case 1: + // (, line 150 + // call R2, line 150 + if (!r_R2()) + { + cursor = limit - v_4; + break lab3; + } + // delete, line 150 + slice_del(); + break; + } + } while (false); + break; + case 9: + // (, line 156 + // call R2, line 157 + if (!r_R2()) + { + return false; + } + // delete, line 157 + slice_del(); + // try, line 158 + v_5 = limit - cursor; + lab4: do { + // (, line 158 + // [, line 159 + ket = cursor; + // literal, line 159 + if (!(eq_s_b(2, "at"))) + { + cursor = limit - v_5; + break lab4; + } + // ], line 159 + bra = cursor; + // call R2, line 159 + if (!r_R2()) + { + cursor = limit - v_5; + break lab4; + } + // delete, line 159 + slice_del(); + } while (false); + break; + } + return true; + } + + private boolean r_y_verb_suffix() { + int among_var; + int v_1; + int v_2; + // (, line 165 + // setlimit, line 166 + v_1 = limit - cursor; + // tomark, line 166 + if (cursor < I_pV) + { + return false; + } + cursor = I_pV; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 166 + // [, line 166 + ket = cursor; + // substring, line 166 + among_var = find_among_b(a_7, 12); + if (among_var == 0) + { + limit_backward = v_2; + return false; + } + // ], line 166 + bra = cursor; + limit_backward = v_2; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 169 + // literal, line 169 + if (!(eq_s_b(1, "u"))) + { + return false; + } + // delete, line 169 + slice_del(); + break; + } + return true; + } + + private boolean r_verb_suffix() { + int among_var; + int v_1; + int v_2; + int v_3; + int v_4; + // (, line 173 + // setlimit, line 174 + v_1 = limit - cursor; + // tomark, line 174 + if (cursor < I_pV) + { + return false; + } + cursor = I_pV; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 174 + // [, line 174 + ket = cursor; + // substring, line 174 + among_var = find_among_b(a_8, 96); + if (among_var == 0) + { + limit_backward = v_2; + return false; + } + // ], line 174 + bra = cursor; + limit_backward = v_2; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 177 + // try, line 177 + v_3 = limit - cursor; + lab0: do { + // (, line 177 + // literal, line 177 + if (!(eq_s_b(1, "u"))) + { + cursor = limit - v_3; + break lab0; + } + // test, line 177 + v_4 = limit - cursor; + // literal, line 177 + if (!(eq_s_b(1, "g"))) + { + cursor = limit - v_3; + break lab0; + } + cursor = limit - v_4; + } while (false); + // ], line 177 + bra = cursor; + // delete, line 177 + slice_del(); + break; + case 2: + // (, line 198 + // delete, line 198 + slice_del(); + break; + } + return true; + } + + private boolean r_residual_suffix() { + int among_var; + int v_1; + int v_2; + // (, line 202 + // [, line 203 + ket = cursor; + // substring, line 203 + among_var = find_among_b(a_9, 8); + if (among_var == 0) + { + return false; + } + // ], line 203 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 206 + // call RV, line 206 + if (!r_RV()) + { + return false; + } + // delete, line 206 + slice_del(); + break; + case 2: + // (, line 208 + // call RV, line 208 + if (!r_RV()) + { + return false; + } + // delete, line 208 + slice_del(); + // try, line 208 + v_1 = limit - cursor; + lab0: do { + // (, line 208 + // [, line 208 + ket = cursor; + // literal, line 208 + if (!(eq_s_b(1, "u"))) + { + cursor = limit - v_1; + break lab0; + } + // ], line 208 + bra = cursor; + // test, line 208 + v_2 = limit - cursor; + // literal, line 208 + if (!(eq_s_b(1, "g"))) + { + cursor = limit - v_1; + break lab0; + } + cursor = limit - v_2; + // call RV, line 208 + if (!r_RV()) + { + cursor = limit - v_1; + break lab0; + } + // delete, line 208 + slice_del(); + } while (false); + break; + } + return true; + } + + public boolean stem() { + int v_1; + int v_2; + int v_3; + int v_4; + int v_5; + int v_6; + // (, line 213 + // do, line 214 + v_1 = cursor; + lab0: do { + // call mark_regions, line 214 + if (!r_mark_regions()) + { + break lab0; + } + } while (false); + cursor = v_1; + // backwards, line 215 + limit_backward = cursor; cursor = limit; + // (, line 215 + // do, line 216 + v_2 = limit - cursor; + lab1: do { + // call attached_pronoun, line 216 + if (!r_attached_pronoun()) + { + break lab1; + } + } while (false); + cursor = limit - v_2; + // do, line 217 + v_3 = limit - cursor; + lab2: do { + // (, line 217 + // or, line 217 + lab3: do { + v_4 = limit - cursor; + lab4: do { + // call standard_suffix, line 217 + if (!r_standard_suffix()) + { + break lab4; + } + break lab3; + } while (false); + cursor = limit - v_4; + lab5: do { + // call y_verb_suffix, line 218 + if (!r_y_verb_suffix()) + { + break lab5; + } + break lab3; + } while (false); + cursor = limit - v_4; + // call verb_suffix, line 219 + if (!r_verb_suffix()) + { + break lab2; + } + } while (false); + } while (false); + cursor = limit - v_3; + // do, line 221 + v_5 = limit - cursor; + lab6: do { + // call residual_suffix, line 221 + if (!r_residual_suffix()) + { + break lab6; + } + } while (false); + cursor = limit - v_5; + cursor = limit_backward; // do, line 223 + v_6 = cursor; + lab7: do { + // call postlude, line 223 + if (!r_postlude()) + { + break lab7; + } + } while (false); + cursor = v_6; + return true; + } + +} + diff --git a/ccm-ldn-terms/test/src/com/arsdigita/london/terms/TermsSuite.java b/ccm-ldn-terms/test/src/com/arsdigita/london/terms/TermsSuite.java index e957c730e..9123a1e82 100755 --- a/ccm-ldn-terms/test/src/com/arsdigita/london/terms/TermsSuite.java +++ b/ccm-ldn-terms/test/src/com/arsdigita/london/terms/TermsSuite.java @@ -28,7 +28,7 @@ import com.arsdigita.tools.junit.framework.PackageTestSuite; * * * @author Joseph A. Bank (jbank@alum.mit.edu) - * @version "$Id: TermsSuite.java 287 2005-02-22 00:29:02Z sskracic $ + * @version "$Id: TermsSuite.java 1963 2009-08-16 19:15:12Z pboy $ **/ public class TermsSuite extends PackageTestSuite { public TermsSuite() { diff --git a/ccm-ldn-util/src/com/arsdigita/london/util/DomainObjectCopier.java b/ccm-ldn-util/src/com/arsdigita/london/util/DomainObjectCopier.java index 2d56ec2c4..770de4de9 100755 --- a/ccm-ldn-util/src/com/arsdigita/london/util/DomainObjectCopier.java +++ b/ccm-ldn-util/src/com/arsdigita/london/util/DomainObjectCopier.java @@ -42,13 +42,9 @@ import java.util.Iterator; * Recursively copies a domain object. * * @author Justin Ross <jross@redhat.com> - * @version $Id: DomainObjectCopier.java 755 2005-09-02 13:42:47Z sskracic $ + * @version $Id: DomainObjectCopier.java 1942 2009-05-29 07:53:23Z terry $ */ public class DomainObjectCopier extends DomainService { - public static final String versionId = - "$Id: DomainObjectCopier.java 755 2005-09-02 13:42:47Z sskracic $" + - "$Author: sskracic $" + - "$DateTime: 2004/03/01 09:31:36 $"; private static Logger s_log = Logger.getLogger(DomainObjectCopier.class); diff --git a/ccm-ldn-util/src/com/arsdigita/london/util/Initializer.java b/ccm-ldn-util/src/com/arsdigita/london/util/Initializer.java index 9ba5959a7..f1b470239 100755 --- a/ccm-ldn-util/src/com/arsdigita/london/util/Initializer.java +++ b/ccm-ldn-util/src/com/arsdigita/london/util/Initializer.java @@ -31,13 +31,9 @@ import org.apache.log4j.Logger; * The CMS initializer. * * @author Justin Ross <jross@redhat.com> - * @version $Id: Initializer.java 758 2005-09-02 14:26:56Z sskracic $ + * @version $Id: Initializer.java 1942 2009-05-29 07:53:23Z terry $ */ public class Initializer extends CompoundInitializer { - public final static String versionId = - "$Id: Initializer.java 758 2005-09-02 14:26:56Z sskracic $" + - "$Author: sskracic $" + - "$DateTime: 2004/01/31 11:58:22 $"; private static final Logger s_log = Logger.getLogger (Initializer.class); diff --git a/ccm-ldn-util/src/com/arsdigita/london/util/Loader.java b/ccm-ldn-util/src/com/arsdigita/london/util/Loader.java index 4932e17f1..bc3a79488 100755 --- a/ccm-ldn-util/src/com/arsdigita/london/util/Loader.java +++ b/ccm-ldn-util/src/com/arsdigita/london/util/Loader.java @@ -27,13 +27,9 @@ import org.apache.log4j.Logger; * Loader. * * @author Justin Ross <jross@redhat.com> - * @version $Id: Loader.java 287 2005-02-22 00:29:02Z sskracic $ + * @version $Id: Loader.java 1942 2009-05-29 07:53:23Z terry $ */ public class Loader extends PackageLoader { - public final static String versionId = - "$Id: Loader.java 287 2005-02-22 00:29:02Z sskracic $" + - "$Author: sskracic $" + - "$DateTime: 2003/10/28 14:26:55 $"; private static final Logger s_log = Logger.getLogger(Loader.class); diff --git a/ccm-ldn-util/src/com/arsdigita/london/util/cmd/BulkPublish.java b/ccm-ldn-util/src/com/arsdigita/london/util/cmd/BulkPublish.java index 8e03f0deb..b212cde31 100755 --- a/ccm-ldn-util/src/com/arsdigita/london/util/cmd/BulkPublish.java +++ b/ccm-ldn-util/src/com/arsdigita/london/util/cmd/BulkPublish.java @@ -26,17 +26,13 @@ import com.arsdigita.persistence.Filter; import com.arsdigita.persistence.OID; import com.arsdigita.persistence.SessionManager; import com.arsdigita.persistence.DataCollection; -import com.arsdigita.workflow.simple.Workflow; import com.arsdigita.domain.DomainObjectFactory; import com.arsdigita.cms.ContentPage; import com.arsdigita.cms.ContentItem; -import com.arsdigita.cms.ContentSection; import com.arsdigita.cms.ContentTypeLifecycleDefinition; import com.arsdigita.cms.Folder; -import com.arsdigita.cms.lifecycle.Lifecycle; import com.arsdigita.cms.lifecycle.LifecycleDefinition; -import com.arsdigita.cms.lifecycle.Phase; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.OptionBuilder; @@ -168,10 +164,6 @@ public class BulkPublish extends Program { } }.run(); - final int expiryNotification = ContentSection. - getConfig().getDefaultNotificationTime(); - - final Iterator items = toPublish.iterator(); while (items.hasNext()) { final OID oid = (OID) items.next(); @@ -201,31 +193,7 @@ public class BulkPublish extends Program { return; } - ContentItem pending = item.publish(def, new Date()); - final Lifecycle lifecycle = pending.getLifecycle(); - Date endDate = lifecycle.getEndDate(); - if (expiryNotification > 0) { - - if (endDate != null) { - - Date notificationDate = new Date(endDate.getTime() - (long)expiryNotification * 3600000L); - - Phase expirationImminentPhase = - lifecycle.addCustomPhase("expirationImminent", - new Long(notificationDate.getTime()), - new Long(endDate.getTime())); - expirationImminentPhase. - setListenerClassName("com.arsdigita.cms.lifecycle.NotifyLifecycleListener"); - expirationImminentPhase.save(); - } - } - if (ContentSection.getConfig().getDeleteWorkflowAfterPublication()) { - Workflow workflow = Workflow.getObjectWorkflow(item); - if (workflow != null) { - workflow.delete(); - } - } - + item.publish(def, new Date()); } }; try { diff --git a/ccm-ldn-util/src/com/arsdigita/london/util/cmd/BulkUnpublish.java b/ccm-ldn-util/src/com/arsdigita/london/util/cmd/BulkUnpublish.java index b1d4e318d..b0e9617dd 100755 --- a/ccm-ldn-util/src/com/arsdigita/london/util/cmd/BulkUnpublish.java +++ b/ccm-ldn-util/src/com/arsdigita/london/util/cmd/BulkUnpublish.java @@ -21,7 +21,7 @@ package com.arsdigita.london.util.cmd; import com.arsdigita.london.util.Program; import com.arsdigita.london.util.Transaction; import com.arsdigita.persistence.CompoundFilter; -import com.arsdigita.persistence.Filter; +import com.arsdigita.persistence.Filter; import com.arsdigita.persistence.FilterFactory; import com.arsdigita.persistence.OID; import com.arsdigita.persistence.SessionManager; @@ -30,7 +30,7 @@ import com.arsdigita.domain.DomainObjectFactory; import com.arsdigita.cms.ContentPage; import com.arsdigita.cms.ContentItem; -import com.arsdigita.cms.Folder; +import com.arsdigita.cms.Folder; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.OptionBuilder; @@ -45,10 +45,12 @@ public class BulkUnpublish extends Program { private static final Logger s_log = Logger.getLogger(BulkUnpublish.class); - public BulkUnpublish() { - super("Bulk Unpublish", - "1.0.0", - ""); + private int folderId; + private String[] types; + private boolean ignoreErrors; + + public BulkUnpublish(String name, String version) { + super(name, version, ""); Options options = getOptions(); @@ -56,15 +58,15 @@ public class BulkUnpublish extends Program { OptionBuilder .hasArgs() .withLongOpt( "types" ) - .withDescription( "Restrict unpublishing to items of the specified content types" ) + .withDescription( "Restrict operation to items of the specified content types" ) .create( "t" ) ); - options.addOption( - OptionBuilder - .hasArg() - .withLongOpt( "restrictToFolderId" ) - .withDescription( "Restrict publishing to items within the folder with the specified id" ) - .create( "f" ) ); - + options.addOption( + OptionBuilder + .hasArg() + .withLongOpt( "restrictToFolderId" ) + .withDescription( "Restrict operation to items within the folder with the specified id" ) + .create( "f" ) ); + options.addOption (OptionBuilder .hasArg(false) @@ -74,49 +76,57 @@ public class BulkUnpublish extends Program { } protected void doRun(CommandLine cmdLine) { - final int folderId; - final String[] types; - final boolean ignoreErrors = cmdLine.hasOption("i"); + this.ignoreErrors = cmdLine.hasOption("i"); if( cmdLine.hasOption( "t" ) ) { - types = cmdLine.getOptionValues( "t" ); + this.types = cmdLine.getOptionValues( "t" ); System.out.println( "Unpublishing live items of types:" ); - for( int i = 0; i < types.length; i++ ) { - System.out.println( types[i] ); + for( int i = 0; i < this.types.length; i++ ) { + System.out.println( this.types[i] ); } } else { - types = null; + this.types = null; System.out.println( "Unpublishing all live items" ); } - if (cmdLine.hasOption("f")) { - folderId = Integer.parseInt(cmdLine.getOptionValue("f")); - Folder folder = new Folder(new OID(Folder.BASE_DATA_OBJECT_TYPE, folderId)); - System.out.println( "Unpublishing items in folder: " + folder.getDisplayName()); - } else { - folderId = -1; - } + if (cmdLine.hasOption("f")) { + this.folderId = Integer.parseInt(cmdLine.getOptionValue("f")); + Folder folder = new Folder(new OID(Folder.BASE_DATA_OBJECT_TYPE, this.folderId)); + System.out.println( "Unpublishing items in folder: " + folder.getDisplayName()); + } else { + this.folderId = -1; + } + + final List toProcess = getListToProcess(true); + unpublish(toProcess); + } + + public static void main(String[] args) { + new BulkUnpublish("Bulk Unpublish","1.0.0").run(args); + } + + protected List getListToProcess(boolean liveOnly) { + final List toProcess = new ArrayList(); - final List toUnpublish = new ArrayList(); new Transaction() { public void doRun() { DataCollection items = SessionManager.getSession() .retrieve(ContentPage.BASE_DATA_OBJECT_TYPE); items.addNotEqualsFilter("type.id", null); - items.addEqualsFilter("version", ContentItem.LIVE); + if(liveOnly) items.addEqualsFilter("version", ContentItem.LIVE); items.addOrder("title"); - FilterFactory filterFactory = items.getFilterFactory(); - - if (folderId >= 0) { - Filter filter = filterFactory.simple(" ancestors like '%/" + folderId + "/%'"); - items.addFilter(filter); - } - if( null != types ) { - CompoundFilter or = filterFactory.or(); + FilterFactory filterFactory = items.getFilterFactory(); - for( int i = 0; i < types.length; i++ ) { - or.addFilter( filterFactory.equals( "objectType", types[i] ) ); + if (this.folderId >= 0) { + Filter filter = filterFactory.simple(" ancestors like '%/" + this.folderId + "/%'"); + items.addFilter(filter); + } + if( null != this.types ) { + CompoundFilter or = filterFactory.or(); + + for( int i = 0; i < this.types.length; i++ ) { + or.addFilter( filterFactory.equals( "objectType", this.types[i] ) ); } items.addFilter( or ); @@ -124,14 +134,23 @@ public class BulkUnpublish extends Program { while (items.next()) { ContentPage page = (ContentPage) DomainObjectFactory.newInstance(items.getDataObject()); - toUnpublish.add(page.getDraftVersion().getOID()); + toProcess.add(page.getDraftVersion().getOID()); } } }.run(); - final Iterator items = toUnpublish.iterator(); + return toProcess; + } + + protected void unpublish(List toProcess) { + final Iterator items = toProcess.iterator(); while (items.hasNext()) { final OID oid = (OID) items.next(); + unpublish(oid); + } + } + + protected void unpublish(OID oid) { Transaction txn = new Transaction() { public void doRun() { ContentPage item = (ContentPage) @@ -146,15 +165,9 @@ public class BulkUnpublish extends Program { txn.run(); } catch (Throwable ex) { s_log.error("Cannot unpublish " + oid, ex); - if (!ignoreErrors) { + if (!this.ignoreErrors) { return; } } - } } - - public static void main(String[] args) { - new BulkUnpublish().run(args); - } - } diff --git a/ccm-ldn-util/src/com/arsdigita/london/util/ui/parameters/URLParameter.java b/ccm-ldn-util/src/com/arsdigita/london/util/ui/parameters/URLParameter.java index fe28135aa..742bc5b5e 100755 --- a/ccm-ldn-util/src/com/arsdigita/london/util/ui/parameters/URLParameter.java +++ b/ccm-ldn-util/src/com/arsdigita/london/util/ui/parameters/URLParameter.java @@ -28,11 +28,11 @@ import javax.servlet.http.HttpServletRequest; * Bebop URLParameter is useless because it merely * check URL form, doesn't actually return a * java.net.URL object. + * + * @version $Id: URLParameter.java 755 2005-09-02 13:42:47Z sskracic $ */ public class URLParameter extends ParameterModel { - public static final String versionId = "$Id: URLParameter.java 755 2005-09-02 13:42:47Z sskracic $ by $Author: sskracic $, $DateTime: 2004/05/10 14:49:43 $"; - public URLParameter(String name) { super(name); }