Syncronisieren mit r2079-2082.
git-svn-id: https://svn.libreccm.org/ccm/trunk@368 8810af33-2d31-482b-a856-94f89814c4dfmaster
parent
f6b0f9685a
commit
7faab110f4
|
|
@ -1,34 +1,34 @@
|
|||
// Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or modify it under
|
||||
// the terms of the GNU Lesser General Public License as published by the Free
|
||||
// Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
// any later version.
|
||||
//
|
||||
// This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
// details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
model com.arsdigita.london.terms.indexing;
|
||||
|
||||
import com.arsdigita.kernel.ACSObject;
|
||||
import com.arsdigita.kernel.Party;
|
||||
import com.arsdigita.london.terms.Domain;
|
||||
|
||||
object type Indexer extends ACSObject {
|
||||
Blob[1..1] filter = trm_domains_indexer.filter BLOB;
|
||||
Date[1..1] lastModifiedDate = trm_domains_indexer.last_modified_date TIMESTAMP;
|
||||
Party[1..1] lastModifiedUser = join trm_domains_indexer.last_modified_user to parties.party_id;
|
||||
reference key (trm_domains_indexer.indexer_id);
|
||||
}
|
||||
|
||||
association {
|
||||
component Indexer[0..1] indexer = join trm_domains.key to trm_domains_indexer.key;
|
||||
Domain[1..1] domain = join trm_domains_indexer.key to trm_domains.key;
|
||||
}
|
||||
|
||||
// Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or modify it under
|
||||
// the terms of the GNU Lesser General Public License as published by the Free
|
||||
// Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
// any later version.
|
||||
//
|
||||
// This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
// details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
model com.arsdigita.london.terms.indexing;
|
||||
|
||||
import com.arsdigita.kernel.ACSObject;
|
||||
import com.arsdigita.kernel.Party;
|
||||
import com.arsdigita.london.terms.Domain;
|
||||
|
||||
object type Indexer extends ACSObject {
|
||||
Blob[1..1] filter = trm_domains_indexer.filter BLOB;
|
||||
Date[1..1] lastModifiedDate = trm_domains_indexer.last_modified_date TIMESTAMP;
|
||||
Party[1..1] lastModifiedUser = join trm_domains_indexer.last_modified_user to parties.party_id;
|
||||
reference key (trm_domains_indexer.indexer_id);
|
||||
}
|
||||
|
||||
association {
|
||||
component Indexer[0..1] indexer = join trm_domains.key to trm_domains_indexer.key;
|
||||
Domain[1..1] domain = join trm_domains_indexer.key to trm_domains.key;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
alter table trm_domains_indexer add
|
||||
constraint trm_dom_ind_las_mod_us_f_1k1i3 foreign key (last_modified_user)
|
||||
references parties(party_id);
|
||||
alter table trm_domains_indexer add
|
||||
constraint trm_domain_indexer_key_f_lghsq foreign key (key)
|
||||
references trm_domains(key);
|
||||
alter table trm_domains_indexer add
|
||||
constraint trm_dom_ind_las_mod_us_f_1k1i3 foreign key (last_modified_user)
|
||||
references parties(party_id);
|
||||
alter table trm_domains_indexer add
|
||||
constraint trm_domain_indexer_key_f_lghsq foreign key (key)
|
||||
references trm_domains(key);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
alter table trm_terms add (unique_id_string varchar(128) default 'UNKNOWN' not null );
|
||||
alter table trm_terms rename column unique_id to unique_id_old;
|
||||
alter table trm_terms rename column unique_id_string to unique_id;
|
||||
update trm_terms set unique_id = unique_id_old;
|
||||
alter table trm_terms drop constraint trm_ter_domai_uniqu_id_u_6sito;
|
||||
alter table trm_terms add constraint trm_ter_domai_uniqu_id_u_6sito unique(domain, unique_id);
|
||||
alter table trm_terms drop column unique_id_old;
|
||||
alter table trm_terms add (unique_id_string varchar(128) default 'UNKNOWN' not null );
|
||||
alter table trm_terms rename column unique_id to unique_id_old;
|
||||
alter table trm_terms rename column unique_id_string to unique_id;
|
||||
update trm_terms set unique_id = unique_id_old;
|
||||
alter table trm_terms drop constraint trm_ter_domai_uniqu_id_u_6sito;
|
||||
alter table trm_terms add constraint trm_ter_domai_uniqu_id_u_6sito unique(domain, unique_id);
|
||||
alter table trm_terms drop column unique_id_old;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
alter table trm_domains_indexer add
|
||||
constraint trm_dom_ind_las_mod_us_f_1k1i3 foreign key (last_modified_user)
|
||||
references parties(party_id);
|
||||
alter table trm_domains_indexer add
|
||||
constraint trm_domain_indexer_key_f_lghsq foreign key (key)
|
||||
references trm_domains(key);
|
||||
alter table trm_domains_indexer add
|
||||
constraint trm_dom_ind_las_mod_us_f_1k1i3 foreign key (last_modified_user)
|
||||
references parties(party_id);
|
||||
alter table trm_domains_indexer add
|
||||
constraint trm_domain_indexer_key_f_lghsq foreign key (key)
|
||||
references trm_domains(key);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
alter table trm_terms add unique_id_string varchar(128) default 'UNKNOWN' not null;
|
||||
alter table trm_terms rename column unique_id to unique_id_old;
|
||||
alter table trm_terms rename column unique_id_string to unique_id;
|
||||
update trm_terms set unique_id = unique_id_old;
|
||||
alter table trm_terms drop constraint trm_ter_domai_uniqu_id_u_6sito;
|
||||
alter table trm_terms add constraint trm_ter_domai_uniqu_id_u_6sito unique(domain, unique_id);
|
||||
alter table trm_terms drop column unique_id_old;
|
||||
alter table trm_terms add unique_id_string varchar(128) default 'UNKNOWN' not null;
|
||||
alter table trm_terms rename column unique_id to unique_id_old;
|
||||
alter table trm_terms rename column unique_id_string to unique_id;
|
||||
update trm_terms set unique_id = unique_id_old;
|
||||
alter table trm_terms drop constraint trm_ter_domai_uniqu_id_u_6sito;
|
||||
alter table trm_terms add constraint trm_ter_domai_uniqu_id_u_6sito unique(domain, unique_id);
|
||||
alter table trm_terms drop column unique_id_old;
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ import com.arsdigita.web.ApplicationType;
|
|||
* Loader.
|
||||
*
|
||||
* @author Justin Ross <jross@redhat.com>
|
||||
* @version $Id: Loader.java 287 2005-02-22 00:29:02Z sskracic $
|
||||
* @version $Id: Loader.java 1878 2009-04-21 13:56:23Z terry $
|
||||
*/
|
||||
public class Loader extends PackageLoader {
|
||||
public void run(final ScriptContext ctx) {
|
||||
|
|
|
|||
|
|
@ -1,44 +1,44 @@
|
|||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Comparator;
|
||||
|
||||
/**
|
||||
* Comparators for sorting {@link Term} objects.
|
||||
*
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
public class TermComparators {
|
||||
|
||||
/**
|
||||
* Compare two {@link Term} object by name, case insensitive.
|
||||
*/
|
||||
public static class OrderByName implements Comparator<Term>, Serializable {
|
||||
public int compare(Term o1, Term o2) {
|
||||
int compare = o1.getName().toLowerCase().compareTo(o2.getName().toLowerCase());
|
||||
if (compare == 0) {
|
||||
compare = o1.getUniqueID().compareTo(o2.getUniqueID());
|
||||
}
|
||||
return compare;
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Comparator;
|
||||
|
||||
/**
|
||||
* Comparators for sorting {@link Term} objects.
|
||||
*
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
public class TermComparators {
|
||||
|
||||
/**
|
||||
* Compare two {@link Term} object by name, case insensitive.
|
||||
*/
|
||||
public static class OrderByName implements Comparator<Term>, Serializable {
|
||||
public int compare(Term o1, Term o2) {
|
||||
int compare = o1.getName().toLowerCase().compareTo(o2.getName().toLowerCase());
|
||||
if (compare == 0) {
|
||||
compare = o1.getUniqueID().compareTo(o2.getUniqueID());
|
||||
}
|
||||
return compare;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
package com.arsdigita.london.terms;
|
||||
|
||||
import com.arsdigita.persistence.DataQuery;
|
||||
import com.arsdigita.domain.DomainCollection;
|
||||
import com.arsdigita.persistence.SessionManager;
|
||||
import com.arsdigita.util.UncheckedWrapperException;
|
||||
import com.arsdigita.web.Application;
|
||||
|
|
@ -33,5 +34,24 @@ public class Util {
|
|||
|
||||
return applicationDomain;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* retrieve a unique integer to allocate to a new term.
|
||||
* Useful for applications that dynamically generate terms.
|
||||
*/
|
||||
|
||||
public static String getNextTermID(Domain domain) {
|
||||
|
||||
DomainCollection terms = domain.getTerms();
|
||||
terms.addOrder(Term.UNIQUE_ID + " desc");
|
||||
int id = 1;
|
||||
if(terms.next()) {
|
||||
Term other = (Term) terms.getDomainObject();
|
||||
id = Integer.parseInt(other.getUniqueID()) + 1;
|
||||
terms.close();
|
||||
}
|
||||
return Integer.toString(id);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,85 +1,85 @@
|
|||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.importer.skos;
|
||||
|
||||
import java.sql.Date;
|
||||
|
||||
import org.apache.commons.cli.CommandLine;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import com.arsdigita.kernel.Kernel;
|
||||
import com.arsdigita.kernel.KernelExcursion;
|
||||
import com.arsdigita.london.util.Transaction;
|
||||
import com.arsdigita.packaging.Program;
|
||||
import com.arsdigita.util.WrappedError;
|
||||
|
||||
/**
|
||||
* A tool for importing domains from an SKOS file.
|
||||
*
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
public class DomainImportTool extends Program {
|
||||
public DomainImportTool() {
|
||||
super("Domain importer", "1.0.0", "skos-filename.rdf key title description version YYYY-MM-DD");
|
||||
}
|
||||
|
||||
protected void doRun(CommandLine cmdLine) {
|
||||
final String[] args = cmdLine.getArgs();
|
||||
if (args.length != 6) {
|
||||
help(System.err);
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
s_log.info("Importing domain from SKOS file " + args[0]);
|
||||
|
||||
final String key = args[1];
|
||||
final String title = args[2];
|
||||
final String description = args[3];
|
||||
final String version = args[4];
|
||||
final Date released = Date.valueOf(args[5]);
|
||||
|
||||
Transaction txn = new Transaction() {
|
||||
protected void doRun() {
|
||||
DomainParser parser = new DomainParser(key, title, description, version, released);
|
||||
parser.parse(args[0]);
|
||||
}
|
||||
};
|
||||
try {
|
||||
txn.run();
|
||||
} catch (RuntimeException e) {
|
||||
s_log.error("RDF importer failed unexpectedly", e);
|
||||
throw e;
|
||||
} catch (WrappedError e) {
|
||||
s_log.error("RDF importer failed unexpectedly", e);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(final String[] args) {
|
||||
new KernelExcursion() {
|
||||
public void excurse() {
|
||||
setEffectiveParty(Kernel.getSystemParty());
|
||||
new DomainImportTool().run(args);
|
||||
}
|
||||
}.run();
|
||||
}
|
||||
|
||||
private static final Logger s_log = Logger.getLogger(DomainImportTool.class);
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.importer.skos;
|
||||
|
||||
import java.sql.Date;
|
||||
|
||||
import org.apache.commons.cli.CommandLine;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import com.arsdigita.kernel.Kernel;
|
||||
import com.arsdigita.kernel.KernelExcursion;
|
||||
import com.arsdigita.london.util.Transaction;
|
||||
import com.arsdigita.packaging.Program;
|
||||
import com.arsdigita.util.WrappedError;
|
||||
|
||||
/**
|
||||
* A tool for importing domains from an SKOS file.
|
||||
*
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
public class DomainImportTool extends Program {
|
||||
public DomainImportTool() {
|
||||
super("Domain importer", "1.0.0", "skos-filename.rdf key title description version YYYY-MM-DD");
|
||||
}
|
||||
|
||||
protected void doRun(CommandLine cmdLine) {
|
||||
final String[] args = cmdLine.getArgs();
|
||||
if (args.length != 6) {
|
||||
help(System.err);
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
s_log.info("Importing domain from SKOS file " + args[0]);
|
||||
|
||||
final String key = args[1];
|
||||
final String title = args[2];
|
||||
final String description = args[3];
|
||||
final String version = args[4];
|
||||
final Date released = Date.valueOf(args[5]);
|
||||
|
||||
Transaction txn = new Transaction() {
|
||||
protected void doRun() {
|
||||
DomainParser parser = new DomainParser(key, title, description, version, released);
|
||||
parser.parse(args[0]);
|
||||
}
|
||||
};
|
||||
try {
|
||||
txn.run();
|
||||
} catch (RuntimeException e) {
|
||||
s_log.error("RDF importer failed unexpectedly", e);
|
||||
throw e;
|
||||
} catch (WrappedError e) {
|
||||
s_log.error("RDF importer failed unexpectedly", e);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(final String[] args) {
|
||||
new KernelExcursion() {
|
||||
public void excurse() {
|
||||
setEffectiveParty(Kernel.getSystemParty());
|
||||
new DomainImportTool().run(args);
|
||||
}
|
||||
}.run();
|
||||
}
|
||||
|
||||
private static final Logger s_log = Logger.getLogger(DomainImportTool.class);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,95 +1,95 @@
|
|||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.importer.skos;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.sql.Date;
|
||||
|
||||
import org.apache.commons.digester.Digester;
|
||||
import org.apache.commons.digester.Rule;
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import com.arsdigita.util.UncheckedWrapperException;
|
||||
|
||||
/**
|
||||
* Parses an SKOS-formatted file using a {@link Digester}.
|
||||
*
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
class DomainParser {
|
||||
public DomainParser(final String key, final String title, final String description, final String version,
|
||||
final Date released) {
|
||||
m_digester = new Digester();
|
||||
|
||||
m_digester.push(this);
|
||||
m_digester.setNamespaceAware(true);
|
||||
m_digester.setRuleNamespaceURI(Namespaces.SKOS);
|
||||
|
||||
m_digester.addRule("RDF/ConceptScheme", new Rule() {
|
||||
public void begin(String namespace, String name, Attributes attrs) throws Exception {
|
||||
URL url = Utils.extractAbout(attrs);
|
||||
m_builder.createDomain(key, url, title, description, version, released);
|
||||
}
|
||||
});
|
||||
|
||||
m_digester.addFactoryCreate("RDF/Concept", m_builder.newPendingTermFactory());
|
||||
|
||||
m_digester.addCallMethod("RDF/Concept/prefLabel", "setPreferredLabel", 1, new Class[] { String.class });
|
||||
m_digester.addCallParam("RDF/Concept/prefLabel", 0);
|
||||
|
||||
m_digester.addCallMethod("RDF/Concept/altLabel", "addAlternateLabel", 1, new Class[] { String.class });
|
||||
m_digester.addCallParam("RDF/Concept/altLabel", 0);
|
||||
|
||||
m_digester.addCallMethod("RDF/Concept/broader", "addBroaderTerm", 1, new Class[] { String.class });
|
||||
m_digester.addCallParam("RDF/Concept/broader", 0, "rdf:resource");
|
||||
|
||||
m_digester.addCallMethod("RDF/Concept/broader/Concept", "addBroaderTerm", 1, new Class[] { String.class });
|
||||
m_digester.addCallParam("RDF/Concept/broader/Concept", 0, "rdf:about");
|
||||
|
||||
m_digester.addCallMethod("RDF/Concept/related", "addRelatedTerm", 1, new Class[] { String.class });
|
||||
m_digester.addCallParam("RDF/Concept/related", 0, "rdf:resource");
|
||||
|
||||
m_digester.addCallMethod("RDF/Concept/related/Concept", "addRelatedTerm", 1, new Class[] { String.class });
|
||||
m_digester.addCallParam("RDF/Concept/related/Concept", 0, "rdf:about");
|
||||
|
||||
m_digester.addCallMethod("RDF/Concept/narrower", "addNarrowerTerm", 1, new Class[] { String.class });
|
||||
m_digester.addCallParam("RDF/Concept/narrower", 0, "rdf:resource");
|
||||
|
||||
m_digester.addCallMethod("RDF/Concept/narrower/Concept", "addNarrowerTerm", 1, new Class[] { String.class });
|
||||
m_digester.addCallParam("RDF/Concept/narrower/Concept", 0, "rdf:about");
|
||||
}
|
||||
|
||||
public void parse(String file) {
|
||||
try {
|
||||
m_digester.parse(file);
|
||||
m_builder.build();
|
||||
} catch (IOException ex) {
|
||||
throw new UncheckedWrapperException("cannot parse " + file, ex);
|
||||
} catch (SAXException ex) {
|
||||
throw new UncheckedWrapperException("cannot parse " + file, ex);
|
||||
}
|
||||
}
|
||||
|
||||
private final DomainBuilder m_builder = new DomainBuilder();
|
||||
|
||||
private final Digester m_digester;
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.importer.skos;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.sql.Date;
|
||||
|
||||
import org.apache.commons.digester.Digester;
|
||||
import org.apache.commons.digester.Rule;
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import com.arsdigita.util.UncheckedWrapperException;
|
||||
|
||||
/**
|
||||
* Parses an SKOS-formatted file using a {@link Digester}.
|
||||
*
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
class DomainParser {
|
||||
public DomainParser(final String key, final String title, final String description, final String version,
|
||||
final Date released) {
|
||||
m_digester = new Digester();
|
||||
|
||||
m_digester.push(this);
|
||||
m_digester.setNamespaceAware(true);
|
||||
m_digester.setRuleNamespaceURI(Namespaces.SKOS);
|
||||
|
||||
m_digester.addRule("RDF/ConceptScheme", new Rule() {
|
||||
public void begin(String namespace, String name, Attributes attrs) throws Exception {
|
||||
URL url = Utils.extractAbout(attrs);
|
||||
m_builder.createDomain(key, url, title, description, version, released);
|
||||
}
|
||||
});
|
||||
|
||||
m_digester.addFactoryCreate("RDF/Concept", m_builder.newPendingTermFactory());
|
||||
|
||||
m_digester.addCallMethod("RDF/Concept/prefLabel", "setPreferredLabel", 1, new Class[] { String.class });
|
||||
m_digester.addCallParam("RDF/Concept/prefLabel", 0);
|
||||
|
||||
m_digester.addCallMethod("RDF/Concept/altLabel", "addAlternateLabel", 1, new Class[] { String.class });
|
||||
m_digester.addCallParam("RDF/Concept/altLabel", 0);
|
||||
|
||||
m_digester.addCallMethod("RDF/Concept/broader", "addBroaderTerm", 1, new Class[] { String.class });
|
||||
m_digester.addCallParam("RDF/Concept/broader", 0, "rdf:resource");
|
||||
|
||||
m_digester.addCallMethod("RDF/Concept/broader/Concept", "addBroaderTerm", 1, new Class[] { String.class });
|
||||
m_digester.addCallParam("RDF/Concept/broader/Concept", 0, "rdf:about");
|
||||
|
||||
m_digester.addCallMethod("RDF/Concept/related", "addRelatedTerm", 1, new Class[] { String.class });
|
||||
m_digester.addCallParam("RDF/Concept/related", 0, "rdf:resource");
|
||||
|
||||
m_digester.addCallMethod("RDF/Concept/related/Concept", "addRelatedTerm", 1, new Class[] { String.class });
|
||||
m_digester.addCallParam("RDF/Concept/related/Concept", 0, "rdf:about");
|
||||
|
||||
m_digester.addCallMethod("RDF/Concept/narrower", "addNarrowerTerm", 1, new Class[] { String.class });
|
||||
m_digester.addCallParam("RDF/Concept/narrower", 0, "rdf:resource");
|
||||
|
||||
m_digester.addCallMethod("RDF/Concept/narrower/Concept", "addNarrowerTerm", 1, new Class[] { String.class });
|
||||
m_digester.addCallParam("RDF/Concept/narrower/Concept", 0, "rdf:about");
|
||||
}
|
||||
|
||||
public void parse(String file) {
|
||||
try {
|
||||
m_digester.parse(file);
|
||||
m_builder.build();
|
||||
} catch (IOException ex) {
|
||||
throw new UncheckedWrapperException("cannot parse " + file, ex);
|
||||
} catch (SAXException ex) {
|
||||
throw new UncheckedWrapperException("cannot parse " + file, ex);
|
||||
}
|
||||
}
|
||||
|
||||
private final DomainBuilder m_builder = new DomainBuilder();
|
||||
|
||||
private final Digester m_digester;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,31 +1,31 @@
|
|||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.importer.skos;
|
||||
|
||||
/**
|
||||
* Declares the namespaces for SKOS and RDF.
|
||||
*
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
final class Namespaces {
|
||||
public static final String RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
|
||||
|
||||
public static final String SKOS = "http://www.w3.org/2004/02/skos/core#";
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.importer.skos;
|
||||
|
||||
/**
|
||||
* Declares the namespaces for SKOS and RDF.
|
||||
*
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
final class Namespaces {
|
||||
public static final String RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
|
||||
|
||||
public static final String SKOS = "http://www.w3.org/2004/02/skos/core#";
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,65 +1,65 @@
|
|||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.importer.skos;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import com.arsdigita.util.Assert;
|
||||
|
||||
/**
|
||||
* A simple progress bar that outputs 0%, 5%, 10%, using a specified logger.
|
||||
*
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
class ProgressBar {
|
||||
public ProgressBar(int length, Logger logger) {
|
||||
Assert.isTrue(length > 0);
|
||||
Assert.isTrue(logger != null);
|
||||
|
||||
m_length = length;
|
||||
m_logger = logger;
|
||||
m_position = 0;
|
||||
m_percentage = -1;
|
||||
}
|
||||
|
||||
public void next() {
|
||||
m_position++;
|
||||
|
||||
int percentage = (m_position * 100) / m_length;
|
||||
|
||||
if (percentage != m_percentage && percentage % 5 == 0) {
|
||||
m_percentage = percentage;
|
||||
m_logger.info(" " + m_percentage + "% (" + m_position + ")");
|
||||
}
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
m_position = 0;
|
||||
m_percentage = -1;
|
||||
}
|
||||
|
||||
private int m_position;
|
||||
|
||||
private int m_percentage;
|
||||
|
||||
private final int m_length;
|
||||
|
||||
private final Logger m_logger;
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.importer.skos;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import com.arsdigita.util.Assert;
|
||||
|
||||
/**
|
||||
* A simple progress bar that outputs 0%, 5%, 10%, using a specified logger.
|
||||
*
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
class ProgressBar {
|
||||
public ProgressBar(int length, Logger logger) {
|
||||
Assert.isTrue(length > 0);
|
||||
Assert.isTrue(logger != null);
|
||||
|
||||
m_length = length;
|
||||
m_logger = logger;
|
||||
m_position = 0;
|
||||
m_percentage = -1;
|
||||
}
|
||||
|
||||
public void next() {
|
||||
m_position++;
|
||||
|
||||
int percentage = (m_position * 100) / m_length;
|
||||
|
||||
if (percentage != m_percentage && percentage % 5 == 0) {
|
||||
m_percentage = percentage;
|
||||
m_logger.info(" " + m_percentage + "% (" + m_position + ")");
|
||||
}
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
m_position = 0;
|
||||
m_percentage = -1;
|
||||
}
|
||||
|
||||
private int m_position;
|
||||
|
||||
private int m_percentage;
|
||||
|
||||
private final int m_length;
|
||||
|
||||
private final Logger m_logger;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,175 +1,175 @@
|
|||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.importer.skos;
|
||||
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import com.arsdigita.london.terms.Domain;
|
||||
import com.arsdigita.london.terms.Term;
|
||||
import com.arsdigita.persistence.SessionManager;
|
||||
import com.arsdigita.util.Assert;
|
||||
|
||||
/**
|
||||
* Builds a polyhierarchy of {@link Term} objects.
|
||||
*
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
class TermBuilder {
|
||||
private static final Logger s_log = Logger.getLogger(TermBuilder.class);
|
||||
|
||||
public TermBuilder(URL url) {
|
||||
Assert.exists(url);
|
||||
m_url = url;
|
||||
m_uniqueID = Utils.extractUniqueID(url);
|
||||
m_preferredLabel = m_uniqueID;
|
||||
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug("Pending term is " + m_url + " with unique ID " + m_uniqueID);
|
||||
}
|
||||
}
|
||||
|
||||
public void setPreferredLabel(String preferredLabel) {
|
||||
m_preferredLabel = preferredLabel;
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug("--> Preferred label is " + m_preferredLabel);
|
||||
}
|
||||
}
|
||||
|
||||
public void addAlternateLabel(String alternateLabel) {
|
||||
m_alternateLabels.add(alternateLabel);
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug("--> An alternate label is " + alternateLabel);
|
||||
}
|
||||
}
|
||||
|
||||
public void addBroaderTerm(String url) throws MalformedURLException {
|
||||
m_broaderTerms.add(new URL(url));
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug("--> Broader term is " + url);
|
||||
}
|
||||
}
|
||||
|
||||
public void addRelatedTerm(String url) throws MalformedURLException {
|
||||
m_relatedTerms.add(new URL(url));
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug("--> Related term is " + url);
|
||||
}
|
||||
}
|
||||
|
||||
public void addNarrowerTerm(String url) throws MalformedURLException {
|
||||
m_narrowerTerms.add(new URL(url));
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug("--> Narrower term is " + url);
|
||||
}
|
||||
}
|
||||
|
||||
public void buildTerm(Domain domain, Map<String, Term> termCache) {
|
||||
Assert.exists(domain);
|
||||
Assert.exists(termCache);
|
||||
Assert.exists(m_preferredLabel);
|
||||
Assert.exists(m_uniqueID);
|
||||
|
||||
if (m_preferredLabel.equals(m_uniqueID)) {
|
||||
s_log.warn("Preferred label was missing for concept " + m_uniqueID);
|
||||
}
|
||||
|
||||
final boolean inAtoZ = false;
|
||||
final String name = m_preferredLabel;
|
||||
final String shortcut = null;
|
||||
|
||||
Term term = (Term) termCache.get(m_uniqueID);
|
||||
|
||||
if (term != null) {
|
||||
term.setName(name);
|
||||
term.setInAtoZ(inAtoZ);
|
||||
term.setShortcut(shortcut);
|
||||
} else {
|
||||
term = Term.create(m_uniqueID, name, inAtoZ, shortcut, domain);
|
||||
termCache.put(term.getUniqueID(), term);
|
||||
}
|
||||
term.save();
|
||||
}
|
||||
|
||||
public void buildPolyhierarchy(Domain domain, Map<String, Term> termCache) {
|
||||
Term thisTerm = (Term) termCache.get(m_uniqueID);
|
||||
|
||||
if (m_broaderTerms.isEmpty()) {
|
||||
domain.addRootTerm(thisTerm);
|
||||
}
|
||||
|
||||
for (Iterator<URL> i = m_broaderTerms.iterator(); i.hasNext();) {
|
||||
URL url = i.next();
|
||||
String uniqueID = Utils.extractUniqueID(url);
|
||||
Term targetTerm = (Term) termCache.get(uniqueID);
|
||||
if (targetTerm == null) {
|
||||
s_log.warn("Narrower term " + uniqueID + " (" + url + ") does not exist");
|
||||
} else {
|
||||
targetTerm.addNarrowerTerm(thisTerm, true, true);
|
||||
}
|
||||
}
|
||||
|
||||
for (Iterator<URL> i = m_relatedTerms.iterator(); i.hasNext();) {
|
||||
URL url = i.next();
|
||||
String uniqueID = Utils.extractUniqueID(url);
|
||||
Term targetTerm = (Term) termCache.get(uniqueID);
|
||||
if (targetTerm == null) {
|
||||
s_log.warn("Related term " + uniqueID + " (" + url + ") does not exist");
|
||||
} else {
|
||||
thisTerm.addRelatedTerm(targetTerm);
|
||||
}
|
||||
}
|
||||
|
||||
for (Iterator<String> i = m_alternateLabels.iterator(); i.hasNext();) {
|
||||
String alternateLabel = i.next();
|
||||
String uniqueID = String.valueOf(s_next_synonym_id--);
|
||||
Term targetTerm = Term.create(uniqueID, alternateLabel, false, null, domain);
|
||||
targetTerm.addPreferredTerm(thisTerm);
|
||||
}
|
||||
|
||||
SessionManager.getSession().flushAll();
|
||||
}
|
||||
|
||||
public URL getURL() {
|
||||
return m_url;
|
||||
}
|
||||
|
||||
private final URL m_url;
|
||||
|
||||
private final String m_uniqueID;
|
||||
|
||||
private String m_preferredLabel;
|
||||
|
||||
private final List<String> m_alternateLabels = new ArrayList<String>();
|
||||
|
||||
private final List<URL> m_broaderTerms = new ArrayList<URL>();
|
||||
|
||||
private final List<URL> m_relatedTerms = new ArrayList<URL>();
|
||||
|
||||
private final List<URL> m_narrowerTerms = new ArrayList<URL>();
|
||||
|
||||
private static int s_next_synonym_id = Integer.MAX_VALUE;
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.importer.skos;
|
||||
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import com.arsdigita.london.terms.Domain;
|
||||
import com.arsdigita.london.terms.Term;
|
||||
import com.arsdigita.persistence.SessionManager;
|
||||
import com.arsdigita.util.Assert;
|
||||
|
||||
/**
|
||||
* Builds a polyhierarchy of {@link Term} objects.
|
||||
*
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
class TermBuilder {
|
||||
private static final Logger s_log = Logger.getLogger(TermBuilder.class);
|
||||
|
||||
public TermBuilder(URL url) {
|
||||
Assert.exists(url);
|
||||
m_url = url;
|
||||
m_uniqueID = Utils.extractUniqueID(url);
|
||||
m_preferredLabel = m_uniqueID;
|
||||
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug("Pending term is " + m_url + " with unique ID " + m_uniqueID);
|
||||
}
|
||||
}
|
||||
|
||||
public void setPreferredLabel(String preferredLabel) {
|
||||
m_preferredLabel = preferredLabel;
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug("--> Preferred label is " + m_preferredLabel);
|
||||
}
|
||||
}
|
||||
|
||||
public void addAlternateLabel(String alternateLabel) {
|
||||
m_alternateLabels.add(alternateLabel);
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug("--> An alternate label is " + alternateLabel);
|
||||
}
|
||||
}
|
||||
|
||||
public void addBroaderTerm(String url) throws MalformedURLException {
|
||||
m_broaderTerms.add(new URL(url));
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug("--> Broader term is " + url);
|
||||
}
|
||||
}
|
||||
|
||||
public void addRelatedTerm(String url) throws MalformedURLException {
|
||||
m_relatedTerms.add(new URL(url));
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug("--> Related term is " + url);
|
||||
}
|
||||
}
|
||||
|
||||
public void addNarrowerTerm(String url) throws MalformedURLException {
|
||||
m_narrowerTerms.add(new URL(url));
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug("--> Narrower term is " + url);
|
||||
}
|
||||
}
|
||||
|
||||
public void buildTerm(Domain domain, Map<String, Term> termCache) {
|
||||
Assert.exists(domain);
|
||||
Assert.exists(termCache);
|
||||
Assert.exists(m_preferredLabel);
|
||||
Assert.exists(m_uniqueID);
|
||||
|
||||
if (m_preferredLabel.equals(m_uniqueID)) {
|
||||
s_log.warn("Preferred label was missing for concept " + m_uniqueID);
|
||||
}
|
||||
|
||||
final boolean inAtoZ = false;
|
||||
final String name = m_preferredLabel;
|
||||
final String shortcut = null;
|
||||
|
||||
Term term = (Term) termCache.get(m_uniqueID);
|
||||
|
||||
if (term != null) {
|
||||
term.setName(name);
|
||||
term.setInAtoZ(inAtoZ);
|
||||
term.setShortcut(shortcut);
|
||||
} else {
|
||||
term = Term.create(m_uniqueID, name, inAtoZ, shortcut, domain);
|
||||
termCache.put(term.getUniqueID(), term);
|
||||
}
|
||||
term.save();
|
||||
}
|
||||
|
||||
public void buildPolyhierarchy(Domain domain, Map<String, Term> termCache) {
|
||||
Term thisTerm = (Term) termCache.get(m_uniqueID);
|
||||
|
||||
if (m_broaderTerms.isEmpty()) {
|
||||
domain.addRootTerm(thisTerm);
|
||||
}
|
||||
|
||||
for (Iterator<URL> i = m_broaderTerms.iterator(); i.hasNext();) {
|
||||
URL url = i.next();
|
||||
String uniqueID = Utils.extractUniqueID(url);
|
||||
Term targetTerm = (Term) termCache.get(uniqueID);
|
||||
if (targetTerm == null) {
|
||||
s_log.warn("Narrower term " + uniqueID + " (" + url + ") does not exist");
|
||||
} else {
|
||||
targetTerm.addNarrowerTerm(thisTerm, true, true);
|
||||
}
|
||||
}
|
||||
|
||||
for (Iterator<URL> i = m_relatedTerms.iterator(); i.hasNext();) {
|
||||
URL url = i.next();
|
||||
String uniqueID = Utils.extractUniqueID(url);
|
||||
Term targetTerm = (Term) termCache.get(uniqueID);
|
||||
if (targetTerm == null) {
|
||||
s_log.warn("Related term " + uniqueID + " (" + url + ") does not exist");
|
||||
} else {
|
||||
thisTerm.addRelatedTerm(targetTerm);
|
||||
}
|
||||
}
|
||||
|
||||
for (Iterator<String> i = m_alternateLabels.iterator(); i.hasNext();) {
|
||||
String alternateLabel = i.next();
|
||||
String uniqueID = String.valueOf(s_next_synonym_id--);
|
||||
Term targetTerm = Term.create(uniqueID, alternateLabel, false, null, domain);
|
||||
targetTerm.addPreferredTerm(thisTerm);
|
||||
}
|
||||
|
||||
SessionManager.getSession().flushAll();
|
||||
}
|
||||
|
||||
public URL getURL() {
|
||||
return m_url;
|
||||
}
|
||||
|
||||
private final URL m_url;
|
||||
|
||||
private final String m_uniqueID;
|
||||
|
||||
private String m_preferredLabel;
|
||||
|
||||
private final List<String> m_alternateLabels = new ArrayList<String>();
|
||||
|
||||
private final List<URL> m_broaderTerms = new ArrayList<URL>();
|
||||
|
||||
private final List<URL> m_relatedTerms = new ArrayList<URL>();
|
||||
|
||||
private final List<URL> m_narrowerTerms = new ArrayList<URL>();
|
||||
|
||||
private static int s_next_synonym_id = Integer.MAX_VALUE;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,79 +1,79 @@
|
|||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.importer.skos;
|
||||
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
|
||||
import org.xml.sax.Attributes;
|
||||
|
||||
import com.arsdigita.util.StringUtils;
|
||||
|
||||
/**
|
||||
* Utilities for parsing SKOS files.
|
||||
*
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
class Utils {
|
||||
/**
|
||||
* Extract the <code>rdf:about</code> attribute from the attributes of an element.
|
||||
*
|
||||
* @param attrs the attributes containing the <code>rdf:about</code> attribute.
|
||||
*
|
||||
* @return the URL for the <code>rdf:about</code>
|
||||
*
|
||||
* @throws MalformedURLException if the <code>rdf:about</code> attribute is malformed
|
||||
*/
|
||||
public static URL extractAbout(Attributes attrs) throws MalformedURLException {
|
||||
return new URL(attrs.getValue(Namespaces.RDF, "about"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the unique ID from the URL.
|
||||
*
|
||||
* <p>Examples:</p>
|
||||
* <ul>
|
||||
* <li><code>http://www.fao.org/aos/agrovoc#c_3</code> » <b>c_3</b>
|
||||
* <li><code>http://iaaa.cps.unizar.es/thesaurus/T5_INFORMATION AND COMMUNICATION</code> » <b>T5_INFORMATION AND COMMUNICATION</b>
|
||||
* <li><code>http://www.eionet.eu.int/gemet/concept/3395</code> » <b>3395</b>
|
||||
* </ul>
|
||||
*
|
||||
* @param url the URL identifying the term
|
||||
*
|
||||
* @return the unique ID
|
||||
*/
|
||||
public static String extractUniqueID(URL url) {
|
||||
String uniqueID = null;
|
||||
|
||||
if (!StringUtils.emptyString(url.getRef())) {
|
||||
uniqueID = url.getRef();
|
||||
} else {
|
||||
uniqueID = url.toExternalForm();
|
||||
while (uniqueID.endsWith("/")) {
|
||||
uniqueID = uniqueID.substring(0, uniqueID.length() - 1);
|
||||
}
|
||||
int lastSlashIndex = uniqueID.lastIndexOf('/');
|
||||
if (lastSlashIndex > 0) {
|
||||
uniqueID = uniqueID.substring(lastSlashIndex + 1);
|
||||
}
|
||||
}
|
||||
return uniqueID;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.importer.skos;
|
||||
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
|
||||
import org.xml.sax.Attributes;
|
||||
|
||||
import com.arsdigita.util.StringUtils;
|
||||
|
||||
/**
|
||||
* Utilities for parsing SKOS files.
|
||||
*
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
class Utils {
|
||||
/**
|
||||
* Extract the <code>rdf:about</code> attribute from the attributes of an element.
|
||||
*
|
||||
* @param attrs the attributes containing the <code>rdf:about</code> attribute.
|
||||
*
|
||||
* @return the URL for the <code>rdf:about</code>
|
||||
*
|
||||
* @throws MalformedURLException if the <code>rdf:about</code> attribute is malformed
|
||||
*/
|
||||
public static URL extractAbout(Attributes attrs) throws MalformedURLException {
|
||||
return new URL(attrs.getValue(Namespaces.RDF, "about"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the unique ID from the URL.
|
||||
*
|
||||
* <p>Examples:</p>
|
||||
* <ul>
|
||||
* <li><code>http://www.fao.org/aos/agrovoc#c_3</code> » <b>c_3</b>
|
||||
* <li><code>http://iaaa.cps.unizar.es/thesaurus/T5_INFORMATION AND COMMUNICATION</code> » <b>T5_INFORMATION AND COMMUNICATION</b>
|
||||
* <li><code>http://www.eionet.eu.int/gemet/concept/3395</code> » <b>3395</b>
|
||||
* </ul>
|
||||
*
|
||||
* @param url the URL identifying the term
|
||||
*
|
||||
* @return the unique ID
|
||||
*/
|
||||
public static String extractUniqueID(URL url) {
|
||||
String uniqueID = null;
|
||||
|
||||
if (!StringUtils.emptyString(url.getRef())) {
|
||||
uniqueID = url.getRef();
|
||||
} else {
|
||||
uniqueID = url.toExternalForm();
|
||||
while (uniqueID.endsWith("/")) {
|
||||
uniqueID = uniqueID.substring(0, uniqueID.length() - 1);
|
||||
}
|
||||
int lastSlashIndex = uniqueID.lastIndexOf('/');
|
||||
if (lastSlashIndex > 0) {
|
||||
uniqueID = uniqueID.substring(lastSlashIndex + 1);
|
||||
}
|
||||
}
|
||||
return uniqueID;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,52 +1,52 @@
|
|||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.indexing;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
|
||||
import com.arsdigita.london.terms.Term;
|
||||
import com.arsdigita.util.Assert;
|
||||
|
||||
/**
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
public class RankedTerm {
|
||||
|
||||
private final Term m_term;
|
||||
|
||||
private final BigDecimal m_ranking;
|
||||
|
||||
public RankedTerm(Term term, BigDecimal ranking) {
|
||||
Assert.exists(term);
|
||||
Assert.exists(ranking);
|
||||
Assert.isTrue(ranking.compareTo(BigDecimal.ONE) < 0);
|
||||
Assert.isTrue(ranking.compareTo(BigDecimal.ZERO) > 0);
|
||||
m_term = term;
|
||||
m_ranking = ranking.setScale(4, BigDecimal.ROUND_HALF_DOWN);
|
||||
}
|
||||
|
||||
public Term getTerm() {
|
||||
return m_term;
|
||||
}
|
||||
|
||||
public BigDecimal getRanking() {
|
||||
return m_ranking;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.indexing;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
|
||||
import com.arsdigita.london.terms.Term;
|
||||
import com.arsdigita.util.Assert;
|
||||
|
||||
/**
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
public class RankedTerm {
|
||||
|
||||
private final Term m_term;
|
||||
|
||||
private final BigDecimal m_ranking;
|
||||
|
||||
public RankedTerm(Term term, BigDecimal ranking) {
|
||||
Assert.exists(term);
|
||||
Assert.exists(ranking);
|
||||
Assert.isTrue(ranking.compareTo(BigDecimal.ONE) < 0);
|
||||
Assert.isTrue(ranking.compareTo(BigDecimal.ZERO) > 0);
|
||||
m_term = term;
|
||||
m_ranking = ranking.setScale(4, BigDecimal.ROUND_HALF_DOWN);
|
||||
}
|
||||
|
||||
public Term getTerm() {
|
||||
return m_term;
|
||||
}
|
||||
|
||||
public BigDecimal getRanking() {
|
||||
return m_ranking;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,150 +1,150 @@
|
|||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.indexing.kea;
|
||||
|
||||
import kea.filters.KEAFilter;
|
||||
import kea.stemmers.PorterStemmer;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import weka.core.Attribute;
|
||||
import weka.core.FastVector;
|
||||
import weka.core.Instance;
|
||||
import weka.core.Instances;
|
||||
|
||||
import com.arsdigita.cms.ContentItem;
|
||||
import com.arsdigita.domain.DomainCollection;
|
||||
import com.arsdigita.london.terms.Domain;
|
||||
import com.arsdigita.london.terms.Term;
|
||||
import com.arsdigita.london.terms.indexing.IndexingConfig;
|
||||
import com.arsdigita.search.ContentProvider;
|
||||
import com.arsdigita.search.ContentType;
|
||||
import com.arsdigita.search.MetadataProvider;
|
||||
import com.arsdigita.search.MetadataProviderRegistry;
|
||||
import com.arsdigita.util.Assert;
|
||||
|
||||
/**
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
class FilterBuilder {
|
||||
private static final Logger s_log = Logger.getLogger(FilterBuilder.class);
|
||||
|
||||
private final Domain m_domain;
|
||||
|
||||
private final String m_language;
|
||||
|
||||
private final IndexingConfig m_config = IndexingConfig.getInstance();
|
||||
|
||||
public FilterBuilder(Domain domain, String language) {
|
||||
Assert.exists(domain);
|
||||
Assert.exists(language);
|
||||
|
||||
m_domain = domain;
|
||||
m_language = language;
|
||||
}
|
||||
|
||||
public KEAFilter build() {
|
||||
|
||||
try {
|
||||
FastVector atts = new FastVector(2);
|
||||
atts.addElement(new Attribute("doc", (FastVector) null));
|
||||
atts.addElement(new Attribute("keyphrases", (FastVector) null));
|
||||
Instances data = new Instances("keyphrase_training_data", atts, 0);
|
||||
|
||||
// Build model
|
||||
KEAFilter filter = new KEAFilter();
|
||||
|
||||
filter.setDebug(false);
|
||||
filter.setDisallowInternalPeriods(m_config.disallowInternalPeriods());
|
||||
filter.setKFused(m_config.keyphraseFrequencyEnabled());
|
||||
filter.setMaxPhraseLength(m_config.getMaxPhraseLength());
|
||||
filter.setMinPhraseLength(m_config.getMinPhraseLength());
|
||||
filter.setMinNumOccur(m_config.getMinPhraseOccurrences());
|
||||
filter.setCheckForProperNouns(m_config.checkForProperNouns());
|
||||
filter.setStemmer(new PorterStemmer());
|
||||
filter.setDocumentLanguage(m_language);
|
||||
filter.setVocabulary(m_domain.getKey());
|
||||
filter.setVocabularyFormat("aplaws");
|
||||
filter.setStopwords(new Stopwords(m_language));
|
||||
filter.setInputFormat(data);
|
||||
filter.setNumFeature();
|
||||
filter.m_Vocabulary = VocabularyCache.getVocabulary(m_domain, filter.getDocumentLanguage());
|
||||
|
||||
s_log.debug("Reading the training content... ");
|
||||
Queries.TrainingItems items = new Queries.TrainingItems(m_domain, filter.getDocumentLanguage());
|
||||
items.setRange(0, m_config.getMaxTrainingItems() + 1);
|
||||
|
||||
try {
|
||||
while (items.next()) {
|
||||
ContentItem item = new ContentItem(items.getID());
|
||||
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug(" --> Reading " + item.getName());
|
||||
}
|
||||
|
||||
double[] newInst = new double[2];
|
||||
|
||||
// Text content
|
||||
MetadataProvider adapter = MetadataProviderRegistry.findAdapter(item.getObjectType());
|
||||
ContentProvider[] content = adapter.getContent(item, ContentType.TEXT);
|
||||
StringBuffer buf = new StringBuffer();
|
||||
for (int i = 0, n = content.length; i < n; i++) {
|
||||
if (content[i].getType().equals(ContentType.TEXT)) {
|
||||
buf.append(new String(content[i].getBytes()));
|
||||
}
|
||||
}
|
||||
newInst[0] = (double) data.attribute(0).addStringValue(buf.toString());
|
||||
|
||||
// Assigned terms
|
||||
StringBuffer keyStr = new StringBuffer();
|
||||
DomainCollection terms = m_domain.getTerms();
|
||||
try {
|
||||
terms.addEqualsFilter("model.childObjects.contentChildren", item.getID());
|
||||
|
||||
while (terms.next()) {
|
||||
Term nextTerm = (Term) terms.getDomainObject();
|
||||
keyStr.append(nextTerm.getModel().getName().toUpperCase());
|
||||
keyStr.append("\n");
|
||||
}
|
||||
} finally {
|
||||
terms.close();
|
||||
}
|
||||
newInst[1] = (double) data.attribute(1).addStringValue(keyStr.toString());
|
||||
|
||||
// Train
|
||||
data.add(new Instance(1.0, newInst));
|
||||
filter.input(data.instance(0));
|
||||
data = data.stringFreeStructure();
|
||||
}
|
||||
} finally {
|
||||
items.close();
|
||||
}
|
||||
|
||||
filter.batchFinished();
|
||||
|
||||
while ((filter.output()) != null) {
|
||||
// Nothing to do here!
|
||||
}
|
||||
return filter;
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.indexing.kea;
|
||||
|
||||
import kea.filters.KEAFilter;
|
||||
import kea.stemmers.PorterStemmer;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import weka.core.Attribute;
|
||||
import weka.core.FastVector;
|
||||
import weka.core.Instance;
|
||||
import weka.core.Instances;
|
||||
|
||||
import com.arsdigita.cms.ContentItem;
|
||||
import com.arsdigita.domain.DomainCollection;
|
||||
import com.arsdigita.london.terms.Domain;
|
||||
import com.arsdigita.london.terms.Term;
|
||||
import com.arsdigita.london.terms.indexing.IndexingConfig;
|
||||
import com.arsdigita.search.ContentProvider;
|
||||
import com.arsdigita.search.ContentType;
|
||||
import com.arsdigita.search.MetadataProvider;
|
||||
import com.arsdigita.search.MetadataProviderRegistry;
|
||||
import com.arsdigita.util.Assert;
|
||||
|
||||
/**
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
class FilterBuilder {
|
||||
private static final Logger s_log = Logger.getLogger(FilterBuilder.class);
|
||||
|
||||
private final Domain m_domain;
|
||||
|
||||
private final String m_language;
|
||||
|
||||
private final IndexingConfig m_config = IndexingConfig.getInstance();
|
||||
|
||||
public FilterBuilder(Domain domain, String language) {
|
||||
Assert.exists(domain);
|
||||
Assert.exists(language);
|
||||
|
||||
m_domain = domain;
|
||||
m_language = language;
|
||||
}
|
||||
|
||||
public KEAFilter build() {
|
||||
|
||||
try {
|
||||
FastVector atts = new FastVector(2);
|
||||
atts.addElement(new Attribute("doc", (FastVector) null));
|
||||
atts.addElement(new Attribute("keyphrases", (FastVector) null));
|
||||
Instances data = new Instances("keyphrase_training_data", atts, 0);
|
||||
|
||||
// Build model
|
||||
KEAFilter filter = new KEAFilter();
|
||||
|
||||
filter.setDebug(false);
|
||||
filter.setDisallowInternalPeriods(m_config.disallowInternalPeriods());
|
||||
filter.setKFused(m_config.keyphraseFrequencyEnabled());
|
||||
filter.setMaxPhraseLength(m_config.getMaxPhraseLength());
|
||||
filter.setMinPhraseLength(m_config.getMinPhraseLength());
|
||||
filter.setMinNumOccur(m_config.getMinPhraseOccurrences());
|
||||
filter.setCheckForProperNouns(m_config.checkForProperNouns());
|
||||
filter.setStemmer(new PorterStemmer());
|
||||
filter.setDocumentLanguage(m_language);
|
||||
filter.setVocabulary(m_domain.getKey());
|
||||
filter.setVocabularyFormat("aplaws");
|
||||
filter.setStopwords(new Stopwords(m_language));
|
||||
filter.setInputFormat(data);
|
||||
filter.setNumFeature();
|
||||
filter.m_Vocabulary = VocabularyCache.getVocabulary(m_domain, filter.getDocumentLanguage());
|
||||
|
||||
s_log.debug("Reading the training content... ");
|
||||
Queries.TrainingItems items = new Queries.TrainingItems(m_domain, filter.getDocumentLanguage());
|
||||
items.setRange(0, m_config.getMaxTrainingItems() + 1);
|
||||
|
||||
try {
|
||||
while (items.next()) {
|
||||
ContentItem item = new ContentItem(items.getID());
|
||||
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug(" --> Reading " + item.getName());
|
||||
}
|
||||
|
||||
double[] newInst = new double[2];
|
||||
|
||||
// Text content
|
||||
MetadataProvider adapter = MetadataProviderRegistry.findAdapter(item.getObjectType());
|
||||
ContentProvider[] content = adapter.getContent(item, ContentType.TEXT);
|
||||
StringBuffer buf = new StringBuffer();
|
||||
for (int i = 0, n = content.length; i < n; i++) {
|
||||
if (content[i].getType().equals(ContentType.TEXT)) {
|
||||
buf.append(new String(content[i].getBytes()));
|
||||
}
|
||||
}
|
||||
newInst[0] = (double) data.attribute(0).addStringValue(buf.toString());
|
||||
|
||||
// Assigned terms
|
||||
StringBuffer keyStr = new StringBuffer();
|
||||
DomainCollection terms = m_domain.getTerms();
|
||||
try {
|
||||
terms.addEqualsFilter("model.childObjects.contentChildren", item.getID());
|
||||
|
||||
while (terms.next()) {
|
||||
Term nextTerm = (Term) terms.getDomainObject();
|
||||
keyStr.append(nextTerm.getModel().getName().toUpperCase());
|
||||
keyStr.append("\n");
|
||||
}
|
||||
} finally {
|
||||
terms.close();
|
||||
}
|
||||
newInst[1] = (double) data.attribute(1).addStringValue(keyStr.toString());
|
||||
|
||||
// Train
|
||||
data.add(new Instance(1.0, newInst));
|
||||
filter.input(data.instance(0));
|
||||
data = data.stringFreeStructure();
|
||||
}
|
||||
} finally {
|
||||
items.close();
|
||||
}
|
||||
|
||||
filter.batchFinished();
|
||||
|
||||
while ((filter.output()) != null) {
|
||||
// Nothing to do here!
|
||||
}
|
||||
return filter;
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,61 +1,61 @@
|
|||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.indexing.kea;
|
||||
|
||||
import kea.filters.KEAFilter;
|
||||
|
||||
import com.arsdigita.caching.CacheTable;
|
||||
import com.arsdigita.london.terms.Domain;
|
||||
import com.arsdigita.util.Assert;
|
||||
|
||||
/**
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
class FilterCache {
|
||||
|
||||
static KEAFilter getFilter(Domain domain, String language) {
|
||||
Assert.exists(domain);
|
||||
Assert.exists(language);
|
||||
|
||||
String key = domain.getKey() + "_" + language;
|
||||
KEAFilter filter = (KEAFilter) s_cache.get(key);
|
||||
if (filter == null) {
|
||||
FilterBuilder builder = new FilterBuilder(domain, language);
|
||||
filter = builder.build();
|
||||
s_cache.put(key, filter);
|
||||
}
|
||||
return filter;
|
||||
}
|
||||
|
||||
public static KEAFilter recreateFilter(Domain domain, String language) {
|
||||
Assert.exists(domain);
|
||||
Assert.exists(language);
|
||||
|
||||
String key = domain.getKey() + "_" + language;
|
||||
s_cache.remove(key);
|
||||
return getFilter(domain, language);
|
||||
}
|
||||
|
||||
public static void reset() {
|
||||
s_cache.removeAll();
|
||||
}
|
||||
|
||||
private static final CacheTable s_cache = new CacheTable("FilterCache", false);
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.indexing.kea;
|
||||
|
||||
import kea.filters.KEAFilter;
|
||||
|
||||
import com.arsdigita.caching.CacheTable;
|
||||
import com.arsdigita.london.terms.Domain;
|
||||
import com.arsdigita.util.Assert;
|
||||
|
||||
/**
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
class FilterCache {
|
||||
|
||||
static KEAFilter getFilter(Domain domain, String language) {
|
||||
Assert.exists(domain);
|
||||
Assert.exists(language);
|
||||
|
||||
String key = domain.getKey() + "_" + language;
|
||||
KEAFilter filter = (KEAFilter) s_cache.get(key);
|
||||
if (filter == null) {
|
||||
FilterBuilder builder = new FilterBuilder(domain, language);
|
||||
filter = builder.build();
|
||||
s_cache.put(key, filter);
|
||||
}
|
||||
return filter;
|
||||
}
|
||||
|
||||
public static KEAFilter recreateFilter(Domain domain, String language) {
|
||||
Assert.exists(domain);
|
||||
Assert.exists(language);
|
||||
|
||||
String key = domain.getKey() + "_" + language;
|
||||
s_cache.remove(key);
|
||||
return getFilter(domain, language);
|
||||
}
|
||||
|
||||
public static void reset() {
|
||||
s_cache.removeAll();
|
||||
}
|
||||
|
||||
private static final CacheTable s_cache = new CacheTable("FilterCache", false);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,124 +1,124 @@
|
|||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.indexing.kea;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import kea.filters.KEAFilter;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import weka.core.Attribute;
|
||||
import weka.core.FastVector;
|
||||
import weka.core.Instance;
|
||||
import weka.core.Instances;
|
||||
|
||||
import com.arsdigita.cms.ContentItem;
|
||||
import com.arsdigita.cms.TextPage;
|
||||
import com.arsdigita.london.terms.Domain;
|
||||
import com.arsdigita.london.terms.Term;
|
||||
import com.arsdigita.london.terms.indexing.Indexer;
|
||||
import com.arsdigita.london.terms.indexing.RankedTerm;
|
||||
|
||||
/**
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
public class IndexerService {
|
||||
private static final Logger s_log = Logger.getLogger(Indexer.class);
|
||||
|
||||
public List<RankedTerm> controlledIndex(Object f, Domain domain, int maxTerms, ContentItem item) {
|
||||
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug("Extracting index from " + item);
|
||||
}
|
||||
final long t1 = System.currentTimeMillis();
|
||||
|
||||
KEAFilter filter = (KEAFilter) f;
|
||||
filter.setNumPhrases(maxTerms);
|
||||
|
||||
FastVector atts = new FastVector(3);
|
||||
atts.addElement(new Attribute("doc", (FastVector) null));
|
||||
atts.addElement(new Attribute("keyphrases", (FastVector) null));
|
||||
atts.addElement(new Attribute("filename", (String) null));
|
||||
Instances data = new Instances("keyphrase_training_data", atts, 0);
|
||||
|
||||
// Extract keyphrases
|
||||
StringBuffer txtStr = new StringBuffer();
|
||||
txtStr.append(((TextPage) item).getTextAsset().getText());
|
||||
|
||||
double[] newInst = new double[2];
|
||||
newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString());
|
||||
newInst[1] = Instance.missingValue();
|
||||
|
||||
data.add(new Instance(1.0, newInst));
|
||||
try {
|
||||
filter.input(data.instance(0));
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
data = data.stringFreeStructure();
|
||||
Instance[] topRankedInstances = new Instance[filter.getNumPhrases()];
|
||||
Instance inst;
|
||||
|
||||
// Iterating over all extracted keyphrases (inst)
|
||||
while ((inst = filter.output()) != null) {
|
||||
int index = (int) inst.value(filter.getRankIndex()) - 1;
|
||||
if (index < filter.getNumPhrases()) {
|
||||
topRankedInstances[index] = inst;
|
||||
}
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug(inst.toString());
|
||||
}
|
||||
}
|
||||
|
||||
// Extract the unique ID's of the matching keyphrases
|
||||
List<RankedTerm> terms = new ArrayList<RankedTerm>();
|
||||
for (int i = 0; i < filter.getNumPhrases(); i++) {
|
||||
if (topRankedInstances[i] != null) {
|
||||
String uniqueID = topRankedInstances[i].stringValue(filter.getStemmedPhraseIndex());
|
||||
BigDecimal ranking = BigDecimal.valueOf(topRankedInstances[i].value(filter.getProbabilityIndex()));
|
||||
Term term = domain.getTerm(uniqueID);
|
||||
terms.add(new RankedTerm(term, ranking));
|
||||
}
|
||||
}
|
||||
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug("Extracted index from " + item + " in " + (System.currentTimeMillis() - t1) + "ms");
|
||||
}
|
||||
return terms;
|
||||
}
|
||||
|
||||
public Object train(Domain domain, String language) {
|
||||
final long t1 = System.currentTimeMillis();
|
||||
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug("Training indexer for domain " + domain.getKey() + "...");
|
||||
}
|
||||
KEAFilter filter = FilterCache.recreateFilter(domain, language);
|
||||
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug("Trained indexer for domain " + domain.getKey() + " in " + (System.currentTimeMillis() - t1)
|
||||
+ "ms");
|
||||
}
|
||||
return filter;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.indexing.kea;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import kea.filters.KEAFilter;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import weka.core.Attribute;
|
||||
import weka.core.FastVector;
|
||||
import weka.core.Instance;
|
||||
import weka.core.Instances;
|
||||
|
||||
import com.arsdigita.cms.ContentItem;
|
||||
import com.arsdigita.cms.TextPage;
|
||||
import com.arsdigita.london.terms.Domain;
|
||||
import com.arsdigita.london.terms.Term;
|
||||
import com.arsdigita.london.terms.indexing.Indexer;
|
||||
import com.arsdigita.london.terms.indexing.RankedTerm;
|
||||
|
||||
/**
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
public class IndexerService {
|
||||
private static final Logger s_log = Logger.getLogger(Indexer.class);
|
||||
|
||||
public List<RankedTerm> controlledIndex(Object f, Domain domain, int maxTerms, ContentItem item) {
|
||||
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug("Extracting index from " + item);
|
||||
}
|
||||
final long t1 = System.currentTimeMillis();
|
||||
|
||||
KEAFilter filter = (KEAFilter) f;
|
||||
filter.setNumPhrases(maxTerms);
|
||||
|
||||
FastVector atts = new FastVector(3);
|
||||
atts.addElement(new Attribute("doc", (FastVector) null));
|
||||
atts.addElement(new Attribute("keyphrases", (FastVector) null));
|
||||
atts.addElement(new Attribute("filename", (String) null));
|
||||
Instances data = new Instances("keyphrase_training_data", atts, 0);
|
||||
|
||||
// Extract keyphrases
|
||||
StringBuffer txtStr = new StringBuffer();
|
||||
txtStr.append(((TextPage) item).getTextAsset().getText());
|
||||
|
||||
double[] newInst = new double[2];
|
||||
newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString());
|
||||
newInst[1] = Instance.missingValue();
|
||||
|
||||
data.add(new Instance(1.0, newInst));
|
||||
try {
|
||||
filter.input(data.instance(0));
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
data = data.stringFreeStructure();
|
||||
Instance[] topRankedInstances = new Instance[filter.getNumPhrases()];
|
||||
Instance inst;
|
||||
|
||||
// Iterating over all extracted keyphrases (inst)
|
||||
while ((inst = filter.output()) != null) {
|
||||
int index = (int) inst.value(filter.getRankIndex()) - 1;
|
||||
if (index < filter.getNumPhrases()) {
|
||||
topRankedInstances[index] = inst;
|
||||
}
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug(inst.toString());
|
||||
}
|
||||
}
|
||||
|
||||
// Extract the unique ID's of the matching keyphrases
|
||||
List<RankedTerm> terms = new ArrayList<RankedTerm>();
|
||||
for (int i = 0; i < filter.getNumPhrases(); i++) {
|
||||
if (topRankedInstances[i] != null) {
|
||||
String uniqueID = topRankedInstances[i].stringValue(filter.getStemmedPhraseIndex());
|
||||
BigDecimal ranking = BigDecimal.valueOf(topRankedInstances[i].value(filter.getProbabilityIndex()));
|
||||
Term term = domain.getTerm(uniqueID);
|
||||
terms.add(new RankedTerm(term, ranking));
|
||||
}
|
||||
}
|
||||
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug("Extracted index from " + item + " in " + (System.currentTimeMillis() - t1) + "ms");
|
||||
}
|
||||
return terms;
|
||||
}
|
||||
|
||||
public Object train(Domain domain, String language) {
|
||||
final long t1 = System.currentTimeMillis();
|
||||
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug("Training indexer for domain " + domain.getKey() + "...");
|
||||
}
|
||||
KEAFilter filter = FilterCache.recreateFilter(domain, language);
|
||||
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug("Trained indexer for domain " + domain.getKey() + " in " + (System.currentTimeMillis() - t1)
|
||||
+ "ms");
|
||||
}
|
||||
return filter;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,123 +1,123 @@
|
|||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.indexing.kea;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
|
||||
import com.arsdigita.categorization.Category;
|
||||
import com.arsdigita.cms.ContentItem;
|
||||
import com.arsdigita.domain.DomainQuery;
|
||||
import com.arsdigita.london.terms.Domain;
|
||||
import com.arsdigita.london.terms.Term;
|
||||
|
||||
/**
|
||||
* Queries for performance optimisation of keyphrase extraction.
|
||||
*
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
class Queries {
|
||||
|
||||
static class TrainingItems extends DomainQuery {
|
||||
|
||||
public static final String QUERY_NAME = "com.arsdigita.london.terms.indexing.getTrainingItems";
|
||||
public static final String ITEM_ID = ContentItem.ID;
|
||||
|
||||
TrainingItems(Domain domain, String language) {
|
||||
super(QUERY_NAME);
|
||||
setParameter(Term.DOMAIN, domain.getKey());
|
||||
setParameter(ContentItem.LANGUAGE, language);
|
||||
}
|
||||
|
||||
public BigDecimal getID() {
|
||||
return (BigDecimal) get(ITEM_ID);
|
||||
}
|
||||
}
|
||||
|
||||
static class PreferredTerms extends DomainQuery {
|
||||
|
||||
public static final String QUERY_NAME = "com.arsdigita.london.terms.indexing.getPreferredTerms";
|
||||
public static final String UNIQUE_ID = Term.UNIQUE_ID;
|
||||
public static final String NAME = Category.NAME;
|
||||
|
||||
PreferredTerms(Domain domain) {
|
||||
super(QUERY_NAME);
|
||||
setParameter("domain", domain.getKey());
|
||||
}
|
||||
|
||||
public String getUniqueID() {
|
||||
return String.valueOf(get(UNIQUE_ID));
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return (String) get(NAME);
|
||||
}
|
||||
}
|
||||
|
||||
static class NonPreferredTerms extends DomainQuery {
|
||||
|
||||
public static final String QUERY_NAME = "com.arsdigita.london.terms.indexing.getNonPreferredTerms";
|
||||
public static final String UNIQUE_ID = Term.UNIQUE_ID;
|
||||
public static final String NAME = Category.NAME;
|
||||
public static final String PREFERRED_UNIQUE_ID = "preferredUniqueID";
|
||||
|
||||
NonPreferredTerms(Domain domain) {
|
||||
super(QUERY_NAME);
|
||||
setParameter("domain", domain.getKey());
|
||||
}
|
||||
|
||||
public String getUniqueID() {
|
||||
return String.valueOf(get(UNIQUE_ID));
|
||||
}
|
||||
|
||||
public String getPreferredUniqueID() {
|
||||
return String.valueOf(get(PREFERRED_UNIQUE_ID));
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return (String) get(NAME);
|
||||
}
|
||||
}
|
||||
|
||||
static class RelatedTerms extends DomainQuery {
|
||||
|
||||
public static final String QUERY_NAME = "com.arsdigita.london.terms.indexing.getRelatedTerms";
|
||||
public static final String UNIQUE_ID = Term.UNIQUE_ID;
|
||||
public static final String RELATED_UNIQUE_ID = "relatedUniqueID";
|
||||
public static final String RELATION_TYPE = Category.REL_TYPE;
|
||||
|
||||
RelatedTerms(Domain domain) {
|
||||
super(QUERY_NAME);
|
||||
setParameter("domain", domain.getKey());
|
||||
}
|
||||
|
||||
public String getUniqueID() {
|
||||
return String.valueOf(get(UNIQUE_ID));
|
||||
}
|
||||
|
||||
public String getRelatedUniqueID() {
|
||||
return String.valueOf(get(RELATED_UNIQUE_ID));
|
||||
}
|
||||
|
||||
public String getRelationType() {
|
||||
return (String) get(RELATION_TYPE);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.indexing.kea;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
|
||||
import com.arsdigita.categorization.Category;
|
||||
import com.arsdigita.cms.ContentItem;
|
||||
import com.arsdigita.domain.DomainQuery;
|
||||
import com.arsdigita.london.terms.Domain;
|
||||
import com.arsdigita.london.terms.Term;
|
||||
|
||||
/**
|
||||
* Queries for performance optimisation of keyphrase extraction.
|
||||
*
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
class Queries {
|
||||
|
||||
static class TrainingItems extends DomainQuery {
|
||||
|
||||
public static final String QUERY_NAME = "com.arsdigita.london.terms.indexing.getTrainingItems";
|
||||
public static final String ITEM_ID = ContentItem.ID;
|
||||
|
||||
TrainingItems(Domain domain, String language) {
|
||||
super(QUERY_NAME);
|
||||
setParameter(Term.DOMAIN, domain.getKey());
|
||||
setParameter(ContentItem.LANGUAGE, language);
|
||||
}
|
||||
|
||||
public BigDecimal getID() {
|
||||
return (BigDecimal) get(ITEM_ID);
|
||||
}
|
||||
}
|
||||
|
||||
static class PreferredTerms extends DomainQuery {
|
||||
|
||||
public static final String QUERY_NAME = "com.arsdigita.london.terms.indexing.getPreferredTerms";
|
||||
public static final String UNIQUE_ID = Term.UNIQUE_ID;
|
||||
public static final String NAME = Category.NAME;
|
||||
|
||||
PreferredTerms(Domain domain) {
|
||||
super(QUERY_NAME);
|
||||
setParameter("domain", domain.getKey());
|
||||
}
|
||||
|
||||
public String getUniqueID() {
|
||||
return String.valueOf(get(UNIQUE_ID));
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return (String) get(NAME);
|
||||
}
|
||||
}
|
||||
|
||||
static class NonPreferredTerms extends DomainQuery {
|
||||
|
||||
public static final String QUERY_NAME = "com.arsdigita.london.terms.indexing.getNonPreferredTerms";
|
||||
public static final String UNIQUE_ID = Term.UNIQUE_ID;
|
||||
public static final String NAME = Category.NAME;
|
||||
public static final String PREFERRED_UNIQUE_ID = "preferredUniqueID";
|
||||
|
||||
NonPreferredTerms(Domain domain) {
|
||||
super(QUERY_NAME);
|
||||
setParameter("domain", domain.getKey());
|
||||
}
|
||||
|
||||
public String getUniqueID() {
|
||||
return String.valueOf(get(UNIQUE_ID));
|
||||
}
|
||||
|
||||
public String getPreferredUniqueID() {
|
||||
return String.valueOf(get(PREFERRED_UNIQUE_ID));
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return (String) get(NAME);
|
||||
}
|
||||
}
|
||||
|
||||
static class RelatedTerms extends DomainQuery {
|
||||
|
||||
public static final String QUERY_NAME = "com.arsdigita.london.terms.indexing.getRelatedTerms";
|
||||
public static final String UNIQUE_ID = Term.UNIQUE_ID;
|
||||
public static final String RELATED_UNIQUE_ID = "relatedUniqueID";
|
||||
public static final String RELATION_TYPE = Category.REL_TYPE;
|
||||
|
||||
RelatedTerms(Domain domain) {
|
||||
super(QUERY_NAME);
|
||||
setParameter("domain", domain.getKey());
|
||||
}
|
||||
|
||||
public String getUniqueID() {
|
||||
return String.valueOf(get(UNIQUE_ID));
|
||||
}
|
||||
|
||||
public String getRelatedUniqueID() {
|
||||
return String.valueOf(get(RELATED_UNIQUE_ID));
|
||||
}
|
||||
|
||||
public String getRelationType() {
|
||||
return (String) get(RELATION_TYPE);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,57 +1,57 @@
|
|||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.indexing.kea;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.URL;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
class Stopwords extends kea.stopwords.Stopwords {
|
||||
|
||||
public Stopwords(String language) throws IOException {
|
||||
String resource = getClass().getPackage().getName().replace('.', '/') + "/stopwords_" + language + ".txt";
|
||||
URL url = getClass().getClassLoader().getResource(resource);
|
||||
if (url == null) {
|
||||
throw new IOException("Could not find resource " + resource);
|
||||
}
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(url.openStream()));
|
||||
String stopWord = null;
|
||||
m_stopWords = new HashSet<String>();
|
||||
try {
|
||||
while ((stopWord = br.readLine()) != null) {
|
||||
m_stopWords.add(stopWord);
|
||||
}
|
||||
} finally {
|
||||
br.close();
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isStopword(String str) {
|
||||
return m_stopWords.contains(str);
|
||||
}
|
||||
|
||||
private final Set<String> m_stopWords;
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.indexing.kea;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.URL;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
class Stopwords extends kea.stopwords.Stopwords {
|
||||
|
||||
public Stopwords(String language) throws IOException {
|
||||
String resource = getClass().getPackage().getName().replace('.', '/') + "/stopwords_" + language + ".txt";
|
||||
URL url = getClass().getClassLoader().getResource(resource);
|
||||
if (url == null) {
|
||||
throw new IOException("Could not find resource " + resource);
|
||||
}
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(url.openStream()));
|
||||
String stopWord = null;
|
||||
m_stopWords = new HashSet<String>();
|
||||
try {
|
||||
while ((stopWord = br.readLine()) != null) {
|
||||
m_stopWords.add(stopWord);
|
||||
}
|
||||
} finally {
|
||||
br.close();
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isStopword(String str) {
|
||||
return m_stopWords.contains(str);
|
||||
}
|
||||
|
||||
private final Set<String> m_stopWords;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,167 +1,167 @@
|
|||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.indexing.kea;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Field;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Vector;
|
||||
|
||||
import kea.stemmers.PorterStemmer;
|
||||
import kea.vocab.Vocabulary;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import com.arsdigita.london.terms.Domain;
|
||||
|
||||
/**
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
class VocabularyBuilder {
|
||||
|
||||
private static final Logger s_log = Logger.getLogger(VocabularyBuilder.class);
|
||||
|
||||
private final Domain m_domain;
|
||||
|
||||
private final String m_language;
|
||||
|
||||
public VocabularyBuilder(Domain domain, String language) {
|
||||
m_domain = domain;
|
||||
m_language = language;
|
||||
}
|
||||
|
||||
public Vocabulary build() throws IOException {
|
||||
s_log.info("Building vocabulary for domain " + m_domain.getKey() + "...");
|
||||
|
||||
Vocabulary vocabulary = new Vocabulary(m_domain.getKey(), "aplaws", m_language);
|
||||
vocabulary.setStemmer(new PorterStemmer());
|
||||
vocabulary.setStopwords(new Stopwords(m_language));
|
||||
|
||||
Map vocabularyEN = createMap(vocabulary, "VocabularyEN");
|
||||
Map vocabularyENrev = createMap(vocabulary, "VocabularyENrev");
|
||||
Map vocabularyREL = createMap(vocabulary, "VocabularyREL");
|
||||
Map vocabularyRT = createMap(vocabulary, "VocabularyRT");
|
||||
createMap(vocabulary, "VocabularyUSE");
|
||||
|
||||
Queries.PreferredTerms preferredTerms = new Queries.PreferredTerms(m_domain);
|
||||
try {
|
||||
while (preferredTerms.next()) {
|
||||
String id = preferredTerms.getUniqueID();
|
||||
String descriptor = preferredTerms.getName();
|
||||
String avterm = vocabulary.pseudoPhrase(descriptor);
|
||||
if (avterm == null) {
|
||||
avterm = descriptor;
|
||||
}
|
||||
if (avterm.length() > 1) {
|
||||
vocabularyEN.put(avterm, id);
|
||||
vocabularyENrev.put(id, descriptor);
|
||||
}
|
||||
}
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug(" --> Built " + vocabularyEN.size() + " preferred terms");
|
||||
}
|
||||
} finally {
|
||||
preferredTerms.close();
|
||||
}
|
||||
|
||||
Queries.NonPreferredTerms nonPreferredTerms = new Queries.NonPreferredTerms(m_domain);
|
||||
int count = 1;
|
||||
try {
|
||||
while (nonPreferredTerms.next()) {
|
||||
String preferred_id = nonPreferredTerms.getPreferredUniqueID();
|
||||
String descriptor = nonPreferredTerms.getName();
|
||||
addNonDescriptor(vocabulary, count++, preferred_id, descriptor);
|
||||
}
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug(" --> Built " + count + " non-preferred terms");
|
||||
}
|
||||
} finally {
|
||||
preferredTerms.close();
|
||||
}
|
||||
|
||||
Queries.RelatedTerms relatedTerms = new Queries.RelatedTerms(m_domain);
|
||||
try {
|
||||
while (relatedTerms.next()) {
|
||||
String id = relatedTerms.getUniqueID();
|
||||
String relationType = relatedTerms.getRelationType();
|
||||
String id_related = relatedTerms.getRelatedUniqueID();
|
||||
|
||||
Vector relatedIds = (Vector) vocabularyREL.get(id);
|
||||
if (relatedIds == null) {
|
||||
relatedIds = new Vector();
|
||||
vocabularyREL.put(id, relatedIds);
|
||||
}
|
||||
relatedIds.add(id_related);
|
||||
|
||||
if ("child".equals(relationType)) {
|
||||
vocabularyRT.put(id + "-" + id_related, "narrower");
|
||||
vocabularyRT.put(id_related + "-" + id, "broader");
|
||||
} else {
|
||||
vocabularyRT.put(id + "-" + id_related, "related");
|
||||
vocabularyRT.put(id_related + "-" + id, "related");
|
||||
}
|
||||
}
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug(" --> Built " + vocabularyRT.size() + " relationships");
|
||||
}
|
||||
} finally {
|
||||
preferredTerms.close();
|
||||
}
|
||||
s_log.info("Built vocabulary for domain " + m_domain.getKey());
|
||||
return vocabulary;
|
||||
}
|
||||
|
||||
private Map createMap(Vocabulary vocabulary, String fieldName) {
|
||||
try {
|
||||
Map<String, String> map = new HashMap<String, String>(106033);
|
||||
Field field = vocabulary.getClass().getDeclaredField(fieldName);
|
||||
field.setAccessible(true);
|
||||
field.set(vocabulary, map);
|
||||
return map;
|
||||
} catch (NoSuchFieldException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (SecurityException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (IllegalAccessException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private void addNonDescriptor(Vocabulary vocabulary, int count, String id_descriptor, String non_descriptor) {
|
||||
|
||||
try {
|
||||
Method addNonDescriptor = vocabulary.getClass().getDeclaredMethod("addNonDescriptor",
|
||||
new Class[] { Integer.TYPE, String.class, String.class });
|
||||
addNonDescriptor.setAccessible(true);
|
||||
addNonDescriptor.invoke(vocabulary, new Object[] { Integer.valueOf(count), id_descriptor, non_descriptor });
|
||||
} catch (SecurityException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (NoSuchMethodException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (IllegalAccessException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (InvocationTargetException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.indexing.kea;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Field;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Vector;
|
||||
|
||||
import kea.stemmers.PorterStemmer;
|
||||
import kea.vocab.Vocabulary;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import com.arsdigita.london.terms.Domain;
|
||||
|
||||
/**
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
class VocabularyBuilder {
|
||||
|
||||
private static final Logger s_log = Logger.getLogger(VocabularyBuilder.class);
|
||||
|
||||
private final Domain m_domain;
|
||||
|
||||
private final String m_language;
|
||||
|
||||
public VocabularyBuilder(Domain domain, String language) {
|
||||
m_domain = domain;
|
||||
m_language = language;
|
||||
}
|
||||
|
||||
public Vocabulary build() throws IOException {
|
||||
s_log.info("Building vocabulary for domain " + m_domain.getKey() + "...");
|
||||
|
||||
Vocabulary vocabulary = new Vocabulary(m_domain.getKey(), "aplaws", m_language);
|
||||
vocabulary.setStemmer(new PorterStemmer());
|
||||
vocabulary.setStopwords(new Stopwords(m_language));
|
||||
|
||||
Map vocabularyEN = createMap(vocabulary, "VocabularyEN");
|
||||
Map vocabularyENrev = createMap(vocabulary, "VocabularyENrev");
|
||||
Map vocabularyREL = createMap(vocabulary, "VocabularyREL");
|
||||
Map vocabularyRT = createMap(vocabulary, "VocabularyRT");
|
||||
createMap(vocabulary, "VocabularyUSE");
|
||||
|
||||
Queries.PreferredTerms preferredTerms = new Queries.PreferredTerms(m_domain);
|
||||
try {
|
||||
while (preferredTerms.next()) {
|
||||
String id = preferredTerms.getUniqueID();
|
||||
String descriptor = preferredTerms.getName();
|
||||
String avterm = vocabulary.pseudoPhrase(descriptor);
|
||||
if (avterm == null) {
|
||||
avterm = descriptor;
|
||||
}
|
||||
if (avterm.length() > 1) {
|
||||
vocabularyEN.put(avterm, id);
|
||||
vocabularyENrev.put(id, descriptor);
|
||||
}
|
||||
}
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug(" --> Built " + vocabularyEN.size() + " preferred terms");
|
||||
}
|
||||
} finally {
|
||||
preferredTerms.close();
|
||||
}
|
||||
|
||||
Queries.NonPreferredTerms nonPreferredTerms = new Queries.NonPreferredTerms(m_domain);
|
||||
int count = 1;
|
||||
try {
|
||||
while (nonPreferredTerms.next()) {
|
||||
String preferred_id = nonPreferredTerms.getPreferredUniqueID();
|
||||
String descriptor = nonPreferredTerms.getName();
|
||||
addNonDescriptor(vocabulary, count++, preferred_id, descriptor);
|
||||
}
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug(" --> Built " + count + " non-preferred terms");
|
||||
}
|
||||
} finally {
|
||||
preferredTerms.close();
|
||||
}
|
||||
|
||||
Queries.RelatedTerms relatedTerms = new Queries.RelatedTerms(m_domain);
|
||||
try {
|
||||
while (relatedTerms.next()) {
|
||||
String id = relatedTerms.getUniqueID();
|
||||
String relationType = relatedTerms.getRelationType();
|
||||
String id_related = relatedTerms.getRelatedUniqueID();
|
||||
|
||||
Vector relatedIds = (Vector) vocabularyREL.get(id);
|
||||
if (relatedIds == null) {
|
||||
relatedIds = new Vector();
|
||||
vocabularyREL.put(id, relatedIds);
|
||||
}
|
||||
relatedIds.add(id_related);
|
||||
|
||||
if ("child".equals(relationType)) {
|
||||
vocabularyRT.put(id + "-" + id_related, "narrower");
|
||||
vocabularyRT.put(id_related + "-" + id, "broader");
|
||||
} else {
|
||||
vocabularyRT.put(id + "-" + id_related, "related");
|
||||
vocabularyRT.put(id_related + "-" + id, "related");
|
||||
}
|
||||
}
|
||||
if (s_log.isDebugEnabled()) {
|
||||
s_log.debug(" --> Built " + vocabularyRT.size() + " relationships");
|
||||
}
|
||||
} finally {
|
||||
preferredTerms.close();
|
||||
}
|
||||
s_log.info("Built vocabulary for domain " + m_domain.getKey());
|
||||
return vocabulary;
|
||||
}
|
||||
|
||||
private Map createMap(Vocabulary vocabulary, String fieldName) {
|
||||
try {
|
||||
Map<String, String> map = new HashMap<String, String>(106033);
|
||||
Field field = vocabulary.getClass().getDeclaredField(fieldName);
|
||||
field.setAccessible(true);
|
||||
field.set(vocabulary, map);
|
||||
return map;
|
||||
} catch (NoSuchFieldException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (SecurityException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (IllegalAccessException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private void addNonDescriptor(Vocabulary vocabulary, int count, String id_descriptor, String non_descriptor) {
|
||||
|
||||
try {
|
||||
Method addNonDescriptor = vocabulary.getClass().getDeclaredMethod("addNonDescriptor",
|
||||
new Class[] { Integer.TYPE, String.class, String.class });
|
||||
addNonDescriptor.setAccessible(true);
|
||||
addNonDescriptor.invoke(vocabulary, new Object[] { Integer.valueOf(count), id_descriptor, non_descriptor });
|
||||
} catch (SecurityException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (NoSuchMethodException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (IllegalAccessException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (InvocationTargetException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,50 +1,50 @@
|
|||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.indexing.kea;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import kea.vocab.Vocabulary;
|
||||
|
||||
import com.arsdigita.caching.CacheTable;
|
||||
import com.arsdigita.london.terms.Domain;
|
||||
|
||||
/**
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
class VocabularyCache {
|
||||
|
||||
public static Vocabulary getVocabulary(Domain domain, String language) throws IOException {
|
||||
String key = domain.getKey() + "_" + language;
|
||||
Vocabulary vocabulary = (Vocabulary) s_cache.get(key);
|
||||
if (vocabulary == null) {
|
||||
VocabularyBuilder builder = new VocabularyBuilder(domain, language);
|
||||
vocabulary = builder.build();
|
||||
s_cache.put(key, vocabulary);
|
||||
}
|
||||
return vocabulary;
|
||||
}
|
||||
|
||||
public static void reset() {
|
||||
s_cache.removeAll();
|
||||
}
|
||||
|
||||
private static final CacheTable s_cache = new CacheTable("VocabularyCache", false);
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2009 Permeance Technologies Pty Ltd. All Rights Reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this library; if not, write to the Free Software Foundation, Inc.,
|
||||
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
|
||||
package com.arsdigita.london.terms.indexing.kea;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import kea.vocab.Vocabulary;
|
||||
|
||||
import com.arsdigita.caching.CacheTable;
|
||||
import com.arsdigita.london.terms.Domain;
|
||||
|
||||
/**
|
||||
* @author <a href="https://sourceforge.net/users/terry_permeance/">terry_permeance</a>
|
||||
*/
|
||||
class VocabularyCache {
|
||||
|
||||
public static Vocabulary getVocabulary(Domain domain, String language) throws IOException {
|
||||
String key = domain.getKey() + "_" + language;
|
||||
Vocabulary vocabulary = (Vocabulary) s_cache.get(key);
|
||||
if (vocabulary == null) {
|
||||
VocabularyBuilder builder = new VocabularyBuilder(domain, language);
|
||||
vocabulary = builder.build();
|
||||
s_cache.put(key, vocabulary);
|
||||
}
|
||||
return vocabulary;
|
||||
}
|
||||
|
||||
public static void reset() {
|
||||
s_cache.removeAll();
|
||||
}
|
||||
|
||||
private static final CacheTable s_cache = new CacheTable("VocabularyCache", false);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ import com.arsdigita.domain.DataObjectNotFoundException;
|
|||
import com.arsdigita.london.terms.Domain;
|
||||
import com.arsdigita.london.terms.Term;
|
||||
import com.arsdigita.london.terms.Terms;
|
||||
import com.arsdigita.london.terms.Util;
|
||||
import com.arsdigita.london.util.ui.parameters.DomainObjectParameter;
|
||||
import com.arsdigita.util.UncheckedWrapperException;
|
||||
|
||||
|
|
@ -146,7 +147,7 @@ public class TermForm extends Form {
|
|||
|
||||
if (term == null) {
|
||||
Domain domain = (Domain)state.getValue(m_domain);
|
||||
m_uniqueid.setValue(state, null);
|
||||
m_uniqueid.setValue(state, Util.getNextTermID(domain));
|
||||
m_name.setValue(state, null);
|
||||
m_desc.setValue(state, null);
|
||||
m_shortcut.setValue(state, null);
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -28,7 +28,7 @@ import com.arsdigita.tools.junit.framework.PackageTestSuite;
|
|||
*
|
||||
*
|
||||
* @author Joseph A. Bank (jbank@alum.mit.edu)
|
||||
* @version "$Id: TermsSuite.java 287 2005-02-22 00:29:02Z sskracic $
|
||||
* @version "$Id: TermsSuite.java 1963 2009-08-16 19:15:12Z pboy $
|
||||
**/
|
||||
public class TermsSuite extends PackageTestSuite {
|
||||
public TermsSuite() {
|
||||
|
|
|
|||
|
|
@ -42,13 +42,9 @@ import java.util.Iterator;
|
|||
* Recursively copies a domain object.
|
||||
*
|
||||
* @author Justin Ross <jross@redhat.com>
|
||||
* @version $Id: DomainObjectCopier.java 755 2005-09-02 13:42:47Z sskracic $
|
||||
* @version $Id: DomainObjectCopier.java 1942 2009-05-29 07:53:23Z terry $
|
||||
*/
|
||||
public class DomainObjectCopier extends DomainService {
|
||||
public static final String versionId =
|
||||
"$Id: DomainObjectCopier.java 755 2005-09-02 13:42:47Z sskracic $" +
|
||||
"$Author: sskracic $" +
|
||||
"$DateTime: 2004/03/01 09:31:36 $";
|
||||
|
||||
private static Logger s_log = Logger.getLogger(DomainObjectCopier.class);
|
||||
|
||||
|
|
|
|||
|
|
@ -31,13 +31,9 @@ import org.apache.log4j.Logger;
|
|||
* The CMS initializer.
|
||||
*
|
||||
* @author Justin Ross <jross@redhat.com>
|
||||
* @version $Id: Initializer.java 758 2005-09-02 14:26:56Z sskracic $
|
||||
* @version $Id: Initializer.java 1942 2009-05-29 07:53:23Z terry $
|
||||
*/
|
||||
public class Initializer extends CompoundInitializer {
|
||||
public final static String versionId =
|
||||
"$Id: Initializer.java 758 2005-09-02 14:26:56Z sskracic $" +
|
||||
"$Author: sskracic $" +
|
||||
"$DateTime: 2004/01/31 11:58:22 $";
|
||||
|
||||
private static final Logger s_log = Logger.getLogger
|
||||
(Initializer.class);
|
||||
|
|
|
|||
|
|
@ -27,13 +27,9 @@ import org.apache.log4j.Logger;
|
|||
* Loader.
|
||||
*
|
||||
* @author Justin Ross <jross@redhat.com>
|
||||
* @version $Id: Loader.java 287 2005-02-22 00:29:02Z sskracic $
|
||||
* @version $Id: Loader.java 1942 2009-05-29 07:53:23Z terry $
|
||||
*/
|
||||
public class Loader extends PackageLoader {
|
||||
public final static String versionId =
|
||||
"$Id: Loader.java 287 2005-02-22 00:29:02Z sskracic $" +
|
||||
"$Author: sskracic $" +
|
||||
"$DateTime: 2003/10/28 14:26:55 $";
|
||||
|
||||
private static final Logger s_log = Logger.getLogger(Loader.class);
|
||||
|
||||
|
|
|
|||
|
|
@ -26,17 +26,13 @@ import com.arsdigita.persistence.Filter;
|
|||
import com.arsdigita.persistence.OID;
|
||||
import com.arsdigita.persistence.SessionManager;
|
||||
import com.arsdigita.persistence.DataCollection;
|
||||
import com.arsdigita.workflow.simple.Workflow;
|
||||
import com.arsdigita.domain.DomainObjectFactory;
|
||||
|
||||
import com.arsdigita.cms.ContentPage;
|
||||
import com.arsdigita.cms.ContentItem;
|
||||
import com.arsdigita.cms.ContentSection;
|
||||
import com.arsdigita.cms.ContentTypeLifecycleDefinition;
|
||||
import com.arsdigita.cms.Folder;
|
||||
import com.arsdigita.cms.lifecycle.Lifecycle;
|
||||
import com.arsdigita.cms.lifecycle.LifecycleDefinition;
|
||||
import com.arsdigita.cms.lifecycle.Phase;
|
||||
|
||||
import org.apache.commons.cli.CommandLine;
|
||||
import org.apache.commons.cli.OptionBuilder;
|
||||
|
|
@ -168,10 +164,6 @@ public class BulkPublish extends Program {
|
|||
}
|
||||
}.run();
|
||||
|
||||
final int expiryNotification = ContentSection.
|
||||
getConfig().getDefaultNotificationTime();
|
||||
|
||||
|
||||
final Iterator items = toPublish.iterator();
|
||||
while (items.hasNext()) {
|
||||
final OID oid = (OID) items.next();
|
||||
|
|
@ -201,31 +193,7 @@ public class BulkPublish extends Program {
|
|||
return;
|
||||
}
|
||||
|
||||
ContentItem pending = item.publish(def, new Date());
|
||||
final Lifecycle lifecycle = pending.getLifecycle();
|
||||
Date endDate = lifecycle.getEndDate();
|
||||
if (expiryNotification > 0) {
|
||||
|
||||
if (endDate != null) {
|
||||
|
||||
Date notificationDate = new Date(endDate.getTime() - (long)expiryNotification * 3600000L);
|
||||
|
||||
Phase expirationImminentPhase =
|
||||
lifecycle.addCustomPhase("expirationImminent",
|
||||
new Long(notificationDate.getTime()),
|
||||
new Long(endDate.getTime()));
|
||||
expirationImminentPhase.
|
||||
setListenerClassName("com.arsdigita.cms.lifecycle.NotifyLifecycleListener");
|
||||
expirationImminentPhase.save();
|
||||
}
|
||||
}
|
||||
if (ContentSection.getConfig().getDeleteWorkflowAfterPublication()) {
|
||||
Workflow workflow = Workflow.getObjectWorkflow(item);
|
||||
if (workflow != null) {
|
||||
workflow.delete();
|
||||
}
|
||||
}
|
||||
|
||||
item.publish(def, new Date());
|
||||
}
|
||||
};
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ package com.arsdigita.london.util.cmd;
|
|||
import com.arsdigita.london.util.Program;
|
||||
import com.arsdigita.london.util.Transaction;
|
||||
import com.arsdigita.persistence.CompoundFilter;
|
||||
import com.arsdigita.persistence.Filter;
|
||||
import com.arsdigita.persistence.Filter;
|
||||
import com.arsdigita.persistence.FilterFactory;
|
||||
import com.arsdigita.persistence.OID;
|
||||
import com.arsdigita.persistence.SessionManager;
|
||||
|
|
@ -30,7 +30,7 @@ import com.arsdigita.domain.DomainObjectFactory;
|
|||
|
||||
import com.arsdigita.cms.ContentPage;
|
||||
import com.arsdigita.cms.ContentItem;
|
||||
import com.arsdigita.cms.Folder;
|
||||
import com.arsdigita.cms.Folder;
|
||||
|
||||
import org.apache.commons.cli.CommandLine;
|
||||
import org.apache.commons.cli.OptionBuilder;
|
||||
|
|
@ -45,10 +45,12 @@ public class BulkUnpublish extends Program {
|
|||
|
||||
private static final Logger s_log = Logger.getLogger(BulkUnpublish.class);
|
||||
|
||||
public BulkUnpublish() {
|
||||
super("Bulk Unpublish",
|
||||
"1.0.0",
|
||||
"");
|
||||
private int folderId;
|
||||
private String[] types;
|
||||
private boolean ignoreErrors;
|
||||
|
||||
public BulkUnpublish(String name, String version) {
|
||||
super(name, version, "");
|
||||
|
||||
Options options = getOptions();
|
||||
|
||||
|
|
@ -56,15 +58,15 @@ public class BulkUnpublish extends Program {
|
|||
OptionBuilder
|
||||
.hasArgs()
|
||||
.withLongOpt( "types" )
|
||||
.withDescription( "Restrict unpublishing to items of the specified content types" )
|
||||
.withDescription( "Restrict operation to items of the specified content types" )
|
||||
.create( "t" ) );
|
||||
options.addOption(
|
||||
OptionBuilder
|
||||
.hasArg()
|
||||
.withLongOpt( "restrictToFolderId" )
|
||||
.withDescription( "Restrict publishing to items within the folder with the specified id" )
|
||||
.create( "f" ) );
|
||||
|
||||
options.addOption(
|
||||
OptionBuilder
|
||||
.hasArg()
|
||||
.withLongOpt( "restrictToFolderId" )
|
||||
.withDescription( "Restrict operation to items within the folder with the specified id" )
|
||||
.create( "f" ) );
|
||||
|
||||
options.addOption
|
||||
(OptionBuilder
|
||||
.hasArg(false)
|
||||
|
|
@ -74,49 +76,57 @@ public class BulkUnpublish extends Program {
|
|||
}
|
||||
|
||||
protected void doRun(CommandLine cmdLine) {
|
||||
final int folderId;
|
||||
final String[] types;
|
||||
final boolean ignoreErrors = cmdLine.hasOption("i");
|
||||
this.ignoreErrors = cmdLine.hasOption("i");
|
||||
|
||||
if( cmdLine.hasOption( "t" ) ) {
|
||||
types = cmdLine.getOptionValues( "t" );
|
||||
this.types = cmdLine.getOptionValues( "t" );
|
||||
|
||||
System.out.println( "Unpublishing live items of types:" );
|
||||
for( int i = 0; i < types.length; i++ ) {
|
||||
System.out.println( types[i] );
|
||||
for( int i = 0; i < this.types.length; i++ ) {
|
||||
System.out.println( this.types[i] );
|
||||
}
|
||||
} else {
|
||||
types = null;
|
||||
this.types = null;
|
||||
System.out.println( "Unpublishing all live items" );
|
||||
}
|
||||
if (cmdLine.hasOption("f")) {
|
||||
folderId = Integer.parseInt(cmdLine.getOptionValue("f"));
|
||||
Folder folder = new Folder(new OID(Folder.BASE_DATA_OBJECT_TYPE, folderId));
|
||||
System.out.println( "Unpublishing items in folder: " + folder.getDisplayName());
|
||||
} else {
|
||||
folderId = -1;
|
||||
}
|
||||
if (cmdLine.hasOption("f")) {
|
||||
this.folderId = Integer.parseInt(cmdLine.getOptionValue("f"));
|
||||
Folder folder = new Folder(new OID(Folder.BASE_DATA_OBJECT_TYPE, this.folderId));
|
||||
System.out.println( "Unpublishing items in folder: " + folder.getDisplayName());
|
||||
} else {
|
||||
this.folderId = -1;
|
||||
}
|
||||
|
||||
final List toProcess = getListToProcess(true);
|
||||
unpublish(toProcess);
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
new BulkUnpublish("Bulk Unpublish","1.0.0").run(args);
|
||||
}
|
||||
|
||||
protected List getListToProcess(boolean liveOnly) {
|
||||
final List toProcess = new ArrayList();
|
||||
|
||||
final List toUnpublish = new ArrayList();
|
||||
new Transaction() {
|
||||
public void doRun() {
|
||||
DataCollection items = SessionManager.getSession()
|
||||
.retrieve(ContentPage.BASE_DATA_OBJECT_TYPE);
|
||||
items.addNotEqualsFilter("type.id", null);
|
||||
items.addEqualsFilter("version", ContentItem.LIVE);
|
||||
if(liveOnly) items.addEqualsFilter("version", ContentItem.LIVE);
|
||||
items.addOrder("title");
|
||||
|
||||
FilterFactory filterFactory = items.getFilterFactory();
|
||||
|
||||
if (folderId >= 0) {
|
||||
Filter filter = filterFactory.simple(" ancestors like '%/" + folderId + "/%'");
|
||||
items.addFilter(filter);
|
||||
}
|
||||
if( null != types ) {
|
||||
CompoundFilter or = filterFactory.or();
|
||||
FilterFactory filterFactory = items.getFilterFactory();
|
||||
|
||||
for( int i = 0; i < types.length; i++ ) {
|
||||
or.addFilter( filterFactory.equals( "objectType", types[i] ) );
|
||||
if (this.folderId >= 0) {
|
||||
Filter filter = filterFactory.simple(" ancestors like '%/" + this.folderId + "/%'");
|
||||
items.addFilter(filter);
|
||||
}
|
||||
if( null != this.types ) {
|
||||
CompoundFilter or = filterFactory.or();
|
||||
|
||||
for( int i = 0; i < this.types.length; i++ ) {
|
||||
or.addFilter( filterFactory.equals( "objectType", this.types[i] ) );
|
||||
}
|
||||
|
||||
items.addFilter( or );
|
||||
|
|
@ -124,14 +134,23 @@ public class BulkUnpublish extends Program {
|
|||
|
||||
while (items.next()) {
|
||||
ContentPage page = (ContentPage) DomainObjectFactory.newInstance(items.getDataObject());
|
||||
toUnpublish.add(page.getDraftVersion().getOID());
|
||||
toProcess.add(page.getDraftVersion().getOID());
|
||||
}
|
||||
}
|
||||
}.run();
|
||||
|
||||
final Iterator items = toUnpublish.iterator();
|
||||
return toProcess;
|
||||
}
|
||||
|
||||
protected void unpublish(List toProcess) {
|
||||
final Iterator items = toProcess.iterator();
|
||||
while (items.hasNext()) {
|
||||
final OID oid = (OID) items.next();
|
||||
unpublish(oid);
|
||||
}
|
||||
}
|
||||
|
||||
protected void unpublish(OID oid) {
|
||||
Transaction txn = new Transaction() {
|
||||
public void doRun() {
|
||||
ContentPage item = (ContentPage)
|
||||
|
|
@ -146,15 +165,9 @@ public class BulkUnpublish extends Program {
|
|||
txn.run();
|
||||
} catch (Throwable ex) {
|
||||
s_log.error("Cannot unpublish " + oid, ex);
|
||||
if (!ignoreErrors) {
|
||||
if (!this.ignoreErrors) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
new BulkUnpublish().run(args);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,11 +28,11 @@ import javax.servlet.http.HttpServletRequest;
|
|||
* Bebop URLParameter is useless because it merely
|
||||
* check URL form, doesn't actually return a
|
||||
* java.net.URL object.
|
||||
*
|
||||
* @version $Id: URLParameter.java 755 2005-09-02 13:42:47Z sskracic $
|
||||
*/
|
||||
public class URLParameter extends ParameterModel {
|
||||
|
||||
public static final String versionId = "$Id: URLParameter.java 755 2005-09-02 13:42:47Z sskracic $ by $Author: sskracic $, $DateTime: 2004/05/10 14:49:43 $";
|
||||
|
||||
public URLParameter(String name) {
|
||||
super(name);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue