import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
const text = `
🦜️🔗 LangChain
🦜️🔗 LangChain
⚡ Building applications with LLMs through composability ⚡
As an open source project in a rapidly developing field, we are extremely open to contributions.
`;
const splitter = RecursiveCharacterTextSplitter.fromLanguage("html", {
chunkSize: 175,
chunkOverlap: 20,
});
const output = await splitter.createDocuments([text]);
console.log(output);
/*
[
Document {
pageContent: '\n',
metadata: { loc: [Object] }
},
Document {
pageContent: '\n 🦜️🔗 LangChain',
metadata: { loc: [Object] }
},
Document {
pageContent: '\n' +
' ',
metadata: { loc: [Object] }
},
Document {
pageContent: '\n' +
' \n' +
'
🦜️🔗 LangChain
\n' +
'
⚡ Building applications with LLMs through composability ⚡
\n' +
'
',
metadata: { loc: [Object] }
},
Document {
pageContent: '\n' +
' As an open source project in a rapidly developing field, we are extremely open to contributions.\n' +
'
\n' +
' \n' +
'',
metadata: { loc: [Object] }
}
]
*/