Reference link:
https://github.com/carrot2/carrot2/blob/master/applications/carrot2-examples/examples/org/carrot2/examples/clustering/ClusteringDocumentList.java
http://www.programcreek.com/java-api-examples/index.php?api=org.carrot2.clustering.lingo.LingoClusteringAlgorithm
https://github.com/carrot2/carrot2/blob/master/applications/carrot2-examples/examples/org/carrot2/examples/clustering/ClusteringDocumentList.java
http://www.programcreek.com/java-api-examples/index.php?api=org.carrot2.clustering.lingo.LingoClusteringAlgorithm
| /* | |
| * Carrot2 project. | |
| * | |
| * Copyright (C) 2002-2015, Dawid Weiss, Stanisław Osiński. | |
| * All rights reserved. | |
| * | |
| * Refer to the full license file "carrot2.LICENSE" | |
| * in the root folder of the repository checkout or at: | |
| * http://www.carrot2.org/carrot2.LICENSE | |
| */ | |
| package org.carrot2.examples; | |
| import java.text.NumberFormat; | |
| import java.util.Collection; | |
| import java.util.Map; | |
| import org.apache.commons.lang.StringUtils; | |
| import org.carrot2.core.Cluster; | |
| import org.carrot2.core.Document; | |
| import org.carrot2.core.ProcessingResult; | |
| import org.carrot2.core.attribute.CommonAttributesDescriptor; | |
| /** | |
| * Simple console formatter for dumping {@link ProcessingResult}. | |
| */ | |
| public class ConsoleFormatter | |
| { | |
| public static void displayResults(ProcessingResult processingResult) | |
| { | |
| final Collection<Document> documents = processingResult.getDocuments(); | |
| final Collection<Cluster> clusters = processingResult.getClusters(); | |
| final Map<String, Object> attributes = processingResult.getAttributes(); | |
| // Show documents | |
| if (documents != null) | |
| { | |
| displayDocuments(documents); | |
| } | |
| // Show clusters | |
| if (clusters != null) | |
| { | |
| displayClusters(clusters); | |
| } | |
| // Show attributes other attributes | |
| displayAttributes(attributes); | |
| } | |
| public static void displayDocuments(final Collection<Document> documents) | |
| { | |
| System.out.println("Collected " + documents.size() + " documents\n"); | |
| for (final Document document : documents) | |
| { | |
| displayDocument(0, document); | |
| } | |
| } | |
| public static void displayAttributes(final Map<String, Object> attributes) | |
| { | |
| System.out.println("Attributes:"); | |
| String DOCUMENTS_ATTRIBUTE = CommonAttributesDescriptor.Keys.DOCUMENTS; | |
| String CLUSTERS_ATTRIBUTE = CommonAttributesDescriptor.Keys.CLUSTERS; | |
| for (final Map.Entry<String, Object> attribute : attributes.entrySet()) | |
| { | |
| if (!DOCUMENTS_ATTRIBUTE.equals(attribute.getKey()) | |
| && !CLUSTERS_ATTRIBUTE.equals(attribute.getKey())) | |
| { | |
| System.out.println(attribute.getKey() + ": " + attribute.getValue()); | |
| } | |
| } | |
| } | |
| public static void displayClusters(final Collection<Cluster> clusters) | |
| { | |
| displayClusters(clusters, Integer.MAX_VALUE); | |
| } | |
| public static void displayClusters(final Collection<Cluster> clusters, | |
| int maxNumberOfDocumentsToShow) | |
| { | |
| displayClusters(clusters, maxNumberOfDocumentsToShow, | |
| ClusterDetailsFormatter.INSTANCE); | |
| } | |
| public static void displayClusters(final Collection<Cluster> clusters, | |
| int maxNumberOfDocumentsToShow, ClusterDetailsFormatter clusterDetailsFormatter) | |
| { | |
| System.out.println("\n\nCreated " + clusters.size() + " clusters\n"); | |
| int clusterNumber = 1; | |
| for (final Cluster cluster : clusters) | |
| { | |
| displayCluster(0, "" + clusterNumber++, cluster, maxNumberOfDocumentsToShow, | |
| clusterDetailsFormatter); | |
| } | |
| } | |
| private static void displayDocument(final int level, Document document) | |
| { | |
| final String indent = getIndent(level); | |
| System.out.printf(indent + "[%2s] ", document.getStringId()); | |
| System.out.println(document.getField(Document.TITLE)); | |
| final String url = document.getField(Document.CONTENT_URL); | |
| if (StringUtils.isNotBlank(url)) | |
| { | |
| System.out.println(indent + " " + url); | |
| } | |
| System.out.println(); | |
| } | |
| private static void displayCluster(final int level, String tag, Cluster cluster, | |
| int maxNumberOfDocumentsToShow, ClusterDetailsFormatter clusterDetailsFormatter) | |
| { | |
| final String label = cluster.getLabel(); | |
| // indent up to level and display this cluster's description phrase | |
| for (int i = 0; i < level; i++) | |
| { | |
| System.out.print(" "); | |
| } | |
| System.out.println(label + " " | |
| + clusterDetailsFormatter.formatClusterDetails(cluster)); | |
| // if this cluster has documents, display three topmost documents. | |
| int documentsShown = 0; | |
| for (final Document document : cluster.getDocuments()) | |
| { | |
| if (documentsShown >= maxNumberOfDocumentsToShow) | |
| { | |
| break; | |
| } | |
| displayDocument(level + 1, document); | |
| documentsShown++; | |
| } | |
| if (maxNumberOfDocumentsToShow > 0 | |
| && (cluster.getDocuments().size() > documentsShown)) | |
| { | |
| System.out.println(getIndent(level + 1) + "... and " | |
| + (cluster.getDocuments().size() - documentsShown) + " more\n"); | |
| } | |
| // finally, if this cluster has subclusters, descend into recursion. | |
| final int num = 1; | |
| for (final Cluster subcluster : cluster.getSubclusters()) | |
| { | |
| displayCluster(level + 1, tag + "." + num, subcluster, | |
| maxNumberOfDocumentsToShow, clusterDetailsFormatter); | |
| } | |
| } | |
| private static String getIndent(final int level) | |
| { | |
| final StringBuilder indent = new StringBuilder(); | |
| for (int i = 0; i < level; i++) | |
| { | |
| indent.append(" "); | |
| } | |
| return indent.toString(); | |
| } | |
| public static class ClusterDetailsFormatter | |
| { | |
| public final static ClusterDetailsFormatter INSTANCE = new ClusterDetailsFormatter(); | |
| protected NumberFormat numberFormat; | |
| public ClusterDetailsFormatter() | |
| { | |
| numberFormat = NumberFormat.getInstance(); | |
| numberFormat.setMaximumFractionDigits(2); | |
| } | |
| public String formatClusterDetails(Cluster cluster) | |
| { | |
| final Double score = cluster.getScore(); | |
| return "(" + cluster.getAllDocuments().size() + " docs" | |
| + (score != null ? ", score: " + numberFormat.format(score) : "") + ")"; | |
| } | |
| } | |
| } |
Comments
Post a Comment