<!DOCTYPE html>

<html>
<head>
  <title>spelling.js</title>
  <meta http-equiv="content-type" content="text/html; charset=UTF-8">
  <meta name="viewport" content="width=device-width, target-densitydpi=160dpi, initial-scale=1.0; maximum-scale=1.0; user-scalable=0;">
  <link rel="stylesheet" media="all" href="docco.css" />
</head>
<body>
  <div id="container">
    <div id="background"></div>
    
    <ul class="sections">
        
          <li id="title">
              <div class="annotation">
                  <h1>spelling.js</h1>
              </div>
          </li>
        
        
        
        <li id="section-1">
            <div class="annotation">
              
              <div class="pilwrap ">
                <a class="pilcrow" href="#section-1">&#182;</a>
              </div>
              
            </div>
            
            <div class="content"><div class='highlight'><pre><span class="hljs-keyword">var</span> fs = <span class="hljs-built_in">require</span>(<span class="hljs-string">'fs'</span>);
<span class="hljs-keyword">var</span> split = <span class="hljs-built_in">require</span>(<span class="hljs-string">'split'</span>);
<span class="hljs-keyword">var</span> wuzzy = <span class="hljs-built_in">require</span>(<span class="hljs-string">'wuzzy'</span>);

<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">train</span> <span class="hljs-params">(corrects, corpus, done)</span> {</span>
    fs.readFile(corpus, <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-params">(err, corpStr)</span> {</span>
        <span class="hljs-keyword">if</span> (err) {
            done(err);
        } <span class="hljs-keyword">else</span> {</pre></div></div>
            
        </li>
        
        
        <li id="section-2">
            <div class="annotation">
              
              <div class="pilwrap ">
                <a class="pilcrow" href="#section-2">&#182;</a>
              </div>
              <p>provide an abstraction around the corpus of 
words </p>

            </div>
            
            <div class="content"><div class='highlight'><pre>            <span class="hljs-keyword">var</span> Trained = <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-params">(freqs)</span> {</span>
                <span class="hljs-keyword">this</span>.freqs = freqs;
            };</pre></div></div>
            
        </li>
        
        
        <li id="section-3">
            <div class="annotation">
              
              <div class="pilwrap ">
                <a class="pilcrow" href="#section-3">&#182;</a>
              </div>
              <p>provide access to the frquency counts of words in the corpus. 
return a mimum of 1 to provide some handling of words that do 
not exist in the word corpus.</p>

            </div>
            
            <div class="content"><div class='highlight'><pre>            Trained.prototype.freq = <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-params">(w)</span> {</span>
                <span class="hljs-keyword">return</span> (<span class="hljs-keyword">this</span>.freqs[w]
                    ? <span class="hljs-keyword">this</span>.freqs[w]
                    : <span class="hljs-number">1</span>
                );
            };</pre></div></div>
            
        </li>
        
        
        <li id="section-4">
            <div class="annotation">
              
              <div class="pilwrap ">
                <a class="pilcrow" href="#section-4">&#182;</a>
              </div>
              <p>get the set of recognized words from the corpus</p>

            </div>
            
            <div class="content"><div class='highlight'><pre>            Trained.prototype.words = <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-params">(w)</span> {</span>
                <span class="hljs-keyword">return</span> <span class="hljs-built_in">Object</span>.keys(<span class="hljs-keyword">this</span>.freqs);
            };</pre></div></div>
            
        </li>
        
        
        <li id="section-5">
            <div class="annotation">
              
              <div class="pilwrap ">
                <a class="pilcrow" href="#section-5">&#182;</a>
              </div>
              <p>build up frequency counts of words in the training corpus</p>

            </div>
            
            <div class="content"><div class='highlight'><pre>            <span class="hljs-keyword">var</span> f = {};
            corpStr.toString().toLowerCase().match(<span class="hljs-regexp">/[a-z]+/g</span>).forEach(<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-params">(w)</span> {</span></pre></div></div>
            
        </li>
        
        
        <li id="section-6">
            <div class="annotation">
              
              <div class="pilwrap ">
                <a class="pilcrow" href="#section-6">&#182;</a>
              </div>
              <p>limit words to those that exist in the English dictionary</p>

            </div>
            
            <div class="content"><div class='highlight'><pre>                <span class="hljs-keyword">if</span> (corrects[w]) {
                    f[w] = (f[w] 
                        ? f[w] + <span class="hljs-number">1</span>
                        : <span class="hljs-number">1</span>
                    );
                }
            });

            done(<span class="hljs-literal">null</span>, <span class="hljs-keyword">new</span> Trained(f)); 
        }
    });
};

<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">readCorrects</span> <span class="hljs-params">(correctsFile, done)</span> {</span>
    <span class="hljs-keyword">var</span> words = {};</pre></div></div>
            
        </li>
        
        
        <li id="section-7">
            <div class="annotation">
              
              <div class="pilwrap ">
                <a class="pilcrow" href="#section-7">&#182;</a>
              </div>
              <p>read in the set of correct English words</p>

            </div>
            
            <div class="content"><div class='highlight'><pre>    fs.createReadStream(
        correctsFile
    ).pipe(
        split()
    ).on(<span class="hljs-string">'data'</span>, <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-params">(word)</span> {</span>
        words[word.toLowerCase().trim()] = <span class="hljs-number">1</span>;
    }).on(<span class="hljs-string">'end'</span>, <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-params">()</span> {</span>
        done(<span class="hljs-literal">null</span>, words);
    }).on(<span class="hljs-string">'error'</span>, <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-params">(err)</span> {</span>
        done(err);
    });
};

<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">getChecker</span> <span class="hljs-params">(mdl)</span> {</span>
    <span class="hljs-keyword">var</span> SpellChecker = <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-params">(model)</span> {</span>
        <span class="hljs-keyword">this</span>.model = model;
    };

    SpellChecker.prototype.check = <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-params">(word)</span> {</span>
        <span class="hljs-keyword">return</span> <span class="hljs-keyword">this</span>._corrections(word.toLowerCase());
    };

    SpellChecker.prototype._corrections = <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-params">(w1)</span> {</span></pre></div></div>
            
        </li>
        
        
        <li id="section-8">
            <div class="annotation">
              
              <div class="pilwrap ">
                <a class="pilcrow" href="#section-8">&#182;</a>
              </div>
              <p>go over each word in the corpus and collect edit 
distance (using levenshtein distance) and frequency 
counts for each word </p>

            </div>
            
            <div class="content"><div class='highlight'><pre>        <span class="hljs-keyword">return</span> <span class="hljs-keyword">this</span>.model.words().map(<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-params">(w2)</span> {</span>
            <span class="hljs-keyword">return</span> {
                w: w2,
                r: wuzzy.levenshtein(w1, w2),
                f: <span class="hljs-keyword">this</span>.model.freq(w2)
            };
        }, <span class="hljs-keyword">this</span>).sort(<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-params">(a, b)</span> {</span></pre></div></div>
            
        </li>
        
        
        <li id="section-9">
            <div class="annotation">
              
              <div class="pilwrap ">
                <a class="pilcrow" href="#section-9">&#182;</a>
              </div>
              <p>sort on edit distance</p>

            </div>
            
            <div class="content"><div class='highlight'><pre>            <span class="hljs-keyword">return</span> (b.r - a.r);
        }).slice(<span class="hljs-number">0</span>, <span class="hljs-number">5</span>).sort(<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-params">(a, b)</span> {</span></pre></div></div>
            
        </li>
        
        
        <li id="section-10">
            <div class="annotation">
              
              <div class="pilwrap ">
                <a class="pilcrow" href="#section-10">&#182;</a>
              </div>
              <p>sort the top 5 results on edit distance 
weighted by frequency of appearance in the 
corpus</p>

            </div>
            
            <div class="content"><div class='highlight'><pre>            <span class="hljs-keyword">return</span> ((b.f * b.r) - (a.f * b.r));</pre></div></div>
            
        </li>
        
        
        <li id="section-11">
            <div class="annotation">
              
              <div class="pilwrap ">
                <a class="pilcrow" href="#section-11">&#182;</a>
              </div>
              <p>return the top result</p>

            </div>
            
            <div class="content"><div class='highlight'><pre>        })[<span class="hljs-number">0</span>].w;
    };

    <span class="hljs-keyword">return</span> <span class="hljs-keyword">new</span> SpellChecker(mdl);
};

module.exports = <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-params">(correctsFile, trainCorpus, done)</span> {</span></pre></div></div>
            
        </li>
        
        
        <li id="section-12">
            <div class="annotation">
              
              <div class="pilwrap ">
                <a class="pilcrow" href="#section-12">&#182;</a>
              </div>
              <p>read in the dictionary of english words</p>

            </div>
            
            <div class="content"><div class='highlight'><pre>    readCorrects(correctsFile, <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-params">(err, corrects)</span> {</span>
        <span class="hljs-keyword">if</span> (err) {
            done(err);
        } <span class="hljs-keyword">else</span> {</pre></div></div>
            
        </li>
        
        
        <li id="section-13">
            <div class="annotation">
              
              <div class="pilwrap ">
                <a class="pilcrow" href="#section-13">&#182;</a>
              </div>
              <p>train the spell checker using the provided corpus of 
text</p>

            </div>
            
            <div class="content"><div class='highlight'><pre>            train(corrects, trainCorpus, <span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-params">(err, model)</span> {</span>
                <span class="hljs-keyword">if</span> (err) {
                    done(err);
                } <span class="hljs-keyword">else</span> {
                    done(<span class="hljs-literal">null</span>, getChecker(model));
                }
            });
        }
    });
};</pre></div></div>
            
        </li>
        
    </ul>
  </div>
</body>
</html>