<?xml version="1.0" encoding="UTF-8"?><api:function-page xml:base="/apidoc/8.0/cts.cluster.xml" generated="2015-10-07T16:36:00.016766-07:00" mode="javascript" xmlns:api="http://marklogic.com/rundmc/api"><api:function-name>cts.cluster</api:function-name><api:suggest>cts.cluster</api:suggest><api:suggest>cts</api:suggest><api:suggest>cluster</api:suggest><api:function-link mode="xquery" fullname="cts:cluster">/apidoc/8.0/cts:cluster.xml</api:function-link><api:function mode="javascript" name="cluster" type="builtin" lib="cts" category="SearchBuiltins" subcategory="Search Clustering" hidden="false" bucket="MarkLogic Built-In Functions" prefix="cts" namespace="http://marklogic.com/cts" fullname="cts.cluster"><api:summary>
  Produces a set of clusters from 
  <span class="javascript" xmlns="http://www.w3.org/1999/xhtml">an array</span> of nodes.  The nodes can be
  any set of nodes, and are typically the result of a
  
  <span class="javascript" xmlns="http://www.w3.org/1999/xhtml"><code>cts.search</code></span>
  operation.
</api:summary><api:params><api:param class="javascript" name="nodes" type="Array"><api:param-description>
    The array of nodes to cluster.
  </api:param-description><api:param-name>nodes</api:param-name><api:param-type>Array</api:param-type></api:param><api:param name="options" type="(element()|map:map)?" optional="true"><api:param-description>
    An 
    <span class="javascript" xmlns="http://www.w3.org/1999/xhtml">object</span> representation of the options 
    for defining the clustering parameters.
    
    The following is a sample options 
    <span class="javascript" xmlns="http://www.w3.org/1999/xhtml">object:</span>
    
    <pre xml:space="preserve" class="javascript" xmlns="http://www.w3.org/1999/xhtml"><br/>
    {
      labelMaxTerms: 4,
      maxClusters: 6,
      useDbConfig: true
    }
    </pre>

    <!-- Not documented on purpose:
         initialization, k-threshold, score -->
    <p xmlns="http://www.w3.org/1999/xhtml">The 
    <span class="javascript"><code>cts.cluster</code></span> options include:</p>
    <blockquote xmlns="http://www.w3.org/1999/xhtml">
    <dl>
    <dt><p>
    <span class="javascript"><code>hierarchicalLevels</code></span></p></dt>
    <dd>An integer specifying how many hierarchical cluster levels the clusterer
    should return. The default is <code>1</code>, which means no hierarchical
    clusters are returned.</dd>
    <dt><p>
    <span class="javascript"><code>labelMaxTerms</code></span></p></dt>
    <dd>An integer specifying the maximum number of terms to use in constructing
    a cluster label.  The default is <code>3</code>.</dd>
    <dt><p>
    <span class="javascript"><code>labelIgnoreWords</code></span></p></dt>
    <dd>
    <span class="javascript">A single word or an array of words</span> 
    that are to be excluded from cluster label.  The default is to
    not exclude any words.</dd>
    <dt><p>
    <span class="javascript"><code>labelIgnoreAttributes</code></span></p></dt>
    <dd>A boolean that indicates whether attribute terms should be excluded
    from the cluster label.  The default is to include terms from attributes.</dd>
    <dt><p>
    <span class="javascript"><code>details</code></span></p></dt>
    <dd>A boolean that indicates whether additional details on the terms
    used in label generation are to be included in the output.  See the
    documentation on cts:distinctive-terms for details on the format of the
    terms returned.  The default <code>false</code>, meaning no such details
    are given.</dd>
    <dt><p>
    <span class="javascript"><code>minClusters</code></span></p></dt>
    <dd>An integer specifying a minimum number of desired clusters returned
    (at any hierarchical level).
    However, if no satisfactory clustering can be produced at a given level,
    only one cluster will be returned, regardless of this setting.
    The default is <code>3</code>.
    </dd>
    <dt><p>
    <span class="javascript"><code>maxClusters</code></span></p></dt>
    <dd>An integer specifying a maximum number of clusters that can be returned
    (at any hierarchical level). The default is <code>15</code>.
    </dd>
    <dt><p>
    <span class="javascript"><code>overlapping</code></span></p></dt>
    <dd>A boolean indicating whether it is acceptable for nodes to be
    assigned to more than one cluster.  The default is <code>false</code>.
    </dd>
    <dt><p>
    <span class="javascript"><code>maxTerms</code></span></p></dt>
    <dd>An integer value specifying the maximum number of distinct terms to
    use in calculating the cluster. The default is <code>200</code>.
    Increasing the value will increase the cost (in terms of both time
    and memory) of calculating the clusters, but may improve the quality
    of the clusters.</dd>
    <dt><p>
    <span class="javascript"><code>algorithm</code></span></p></dt>
    <dd>A value indicating which clustering algorithm to use, either
    <code>k-means</code> or <code>lsi</code>. The default is
    <code>k-means</code>.  The LSI algorithm is significantly more expensive
    to compute, both in terms of time and space.
    </dd>
    <dt><p>
    <span class="javascript"><code>numTries</code></span></p></dt>
    <dd>Specifies the number of times to run the clusterer against
    the specified data. The default is 1.
    Because of the way the algorithms work, running
    the cluster multiple times will increase the number of terms, and
    tends to improve the accuratacy of the clusters. It does so at the
    cost of performance, as each time it runs, it has to do more work.
    </dd>
    <dt><p>
    <span class="javascript"><code>useDbConfig</code></span></p></dt>
    <dd>A boolean value indicating whether to use the current DB configuration
    for determining which terms to use.  The default is <code>false</code>,
    which means that the default set of options, as well as any indexing
    options you specify in the options node, will be used for calculating 
    the clusters and their labels. When set to
    <code>true</code>, any indexing options set in the context database
    configuration (including any field settings) are used, as well as any
    default settings that you have not explicitly turned off in the options
    node.
    </dd>
    </dl>
    </blockquote>
    <p xmlns="http://www.w3.org/1999/xhtml">The options 
    <span class="javascript">object</span> also includes indexing
    options in the <code>http://marklogic.com/xdmp/database</code>
    namespace.
    These control which terms to use. Note that the use of certain
    options, such as 
    <span class="javascript"><code>fastCaseSensitiveSearches</code></span>, will not
    impact final results unless the term vector size is limited with
    the 
    <span class="javascript"><code>maxTerms</code></span> option.  
    Other options, such as
    
    <span class="javascript"><code>phraseThroughs</code></span>, will only 
    generate terms if some other option is also enabled (in this case
    
    <span class="javascript"><code>fastPhraseSearches</code></span>). </p>
    <p xmlns="http://www.w3.org/1999/xhtml">The database options are the same as the database options shown for
    <a href="/cts:distinctive-terms#db-term-options">
    
    <span class="javascript"><code>cts.distinctiveTerms</code></span></a>.
    </p>

  </api:param-description><api:param-name>options</api:param-name><api:param-type>Object?</api:param-type></api:param></api:params><api:return class="javascript">Object</api:return><api:example class="javascript"><a id="clusterEx1" xmlns="http://www.w3.org/1999/xhtml"></a><pre xml:space="preserve" xmlns="http://www.w3.org/1999/xhtml">

cts.cluster(
    cts.search(cts.wordQuery("steroids")).toArray(),
    {
        algorithm: "lsi",
        hierarchicalLevels: 3,
        minClusters: 2,
        maxClusters: 12,
        overlapping: false,
        labelIgnoreWords: ["of", "the", "on", "in", "at", "a", "an", "for", "from", "by", "and"],
        stemmedSearches: "advanced",
        fastPhraseSearches: true,
        fastElementWordSearches:true,
        fastElementPhraseSearches:true
    }
);
=&gt;
{
  "clusters":[
    {
      "id":"4904706095739760677",
      "label":"neonate, cortisol, fetal",
      "nodes":[3,4,7,9,14]
    },
    {
      "id":"741204961292539384",
      "label":"fetal, cortisol, being",
      "nodes":[8,15]
    },
    {
      "id":"9998437716377655230",
      "label":"locus, male, fetal",
      "nodes":[6]
    },
      ...
    {
      "id":"7956765932334497548",
      "parentId":"14551791662219883254",
      "label":"normal, endometrium, also",
      "nodes":[17]
    },
    {
      "id":"4427100138446341770",
      "parentId":"14551791662219883254",
      "label":"km, administration, do",
      "nodes":[12]
    }
  ],
  "options":{
    "algorithm":"lsi",
    "language":"en",
    "stemmedSearches":"advanced",
    "fastElementPhraseSearches":true,
    "fastElementWordSearches":true,
    "maxClusters":12,
    "minClusters":2,
    "hierarchicalLevels":3,
    "maxTerms":200,
    "labelMaxTerms":3,
    "labelIgnoreWords":[
      "a","an","and","at","by","for","from","in","of","on","the"],
    "labelIgnoreAttributes":false,
    "numTries":1,
    "score":"logtfidf",
    "useDbConfig":false,
    "details":false,
    "overlapping":false
  }
}


</pre></api:example></api:function></api:function-page>