Whatever message this page gives is out now! Go check it out!
Field | Description |
text | String body used for chunking and vector search. |
metadata | Struct: file name, URI, parser fields, custom tags. |
<cfscript>
docService = documentService();
documents = docService.load({
path: expandPath("./docs/")
});
if (isArray(documents) && arrayLen(documents) > 0) {
doc = documents[1];
writeOutput("text length: " & len(doc.text));
writeDump(doc.metadata);
}
</cfscript><cfscript>
docService = documentService();
docsDir = expandPath("docs");
dirDocuments = docService.load(docsDir);
fileDocuments = docService.load(docsDir & "/upgrade-adobe-plan.pdf");
writeOutput(arrayLen(dirDocuments) & " from dir, " & arrayLen(fileDocuments) & " from file");
</cfscript><cfscript>
docService = documentService();
documents = docService.load({
path: expandPath("./docs/"),
pattern: "*.pdf",
recursive: false,
metadata: { category: "test" }
});
writeDump(documents);
</cfscript>Option | Description |
path | Required. Absolute path to a file, a directory, or a plain string path. Use expandPath() to resolve relative paths. |
pattern | Glob filter applied to filenames (e.g. *.pdf). Only matching files are loaded. Omit to load all supported formats. |
recursive | When true, walks subdirectories. When false (default), processes only the top-level folder. |
metadata | A struct merged into each loaded document's metadata. Useful for tagging a batch with a category, source label, or version. |
parserType | Forces a specific parser regardless of file extension. Omit to use automatic format detection based on extension. |
parserConfigs | A struct keyed by format (e.g. json, pdf, csv) with format-specific parser settings such as JSON path selectors, PDF page ranges, or CSV row-per-document. |
maxFileSize | Upper bound on file size for ingestion in bytes. Non-zero values reject or skip oversized files early. 0 means no limit. |
includePatterns | Array of URL or path patterns to allow. Applied when loading from URLs. |
excludePatterns | Array of URL or path patterns to block. Applied when loading from URLs. |
parallel | When true, loads files using multiple threads. Use with maxThreads to control concurrency. |
maxThreads | Maximum number of threads for parallel loading. Very large values may be capped (e.g. at 64). |
<cfscript>
docService = documentService();
rawAsText = docService.load({
path: expandPath("./docs/district.json"),
parserType: "text"
});
autoParsed = docService.load({
path: expandPath("./docs/district.json")
});
writeOutput("Forced text length: " & len(rawAsText[1].text));
writeOutput("<br>Auto-parsed doc count: " & arrayLen(autoParsed));
</cfscript><cfscript>
docService = documentService();
docs = docService.load({
path: expandPath("./docs/rag.json"),
parserConfigs: {
json: {
jsonPath: "$.articles[*]",
contentKey: "body",
metadataKeys: ["title", "author", "publishedAt"]
}
}
});
writeDump(docs);
</cfscript><cfscript>
docService = documentService();
pageDocs = docService.load({
path: expandPath("./docs/upgrade-adobe-plan.pdf"),
parserConfigs: { pdf: { pages: "1" } }
});
writeOutput(len(pageDocs[1].text));
</cfscript><cfscript>
docService = documentService();
future = docService.loadAsync({
path: expandPath("./docs/age-when-completed-education.csv"),
parserConfigs: {
csv: {
rowPerDocument: true
}
}
});
docs = future.get();
writeOutput("Documents: " & arrayLen(docs));
</cfscript><cfscript>
docService = documentService();
docs = docService.load({
path: "https://www.adobe.com/robots.txt",
requestOptions: {
connectionTimeout: 60000,
readTimeout: 120000,
maxRetries: 2,
userAgent: "ColdFusion-RAG-Test/1.0"
}
});
writeOutput(arrayLen(docs) & " document(s)");
writeOutput("<br>Preview: " & left(docs[1].text, 200))
</cfscript>Field | Description |
connectionTimeout | Milliseconds to wait when establishing a connection before timing out. |
readTimeout | Milliseconds to wait for data to be received before timing out. |
maxRetries | Number of retry attempts on transient failures. |
userAgent | User-agent string sent with the HTTP request. |
<cfscript>
docService = documentService();
docIterable = docService.lazyLoad({
path: expandPath("./docs/"),
pattern: "*.pdf"
});
count = 0;
cfloop(condition="docIterable.hasNext()") {
doc = docIterable.next();
count++;
writeOutput("Doc " & count & ": " & len(doc.text) & " chars<br>");
}
writeOutput("Total: " & count);
</cfscript>Method | Description |
lazyLoad(options) | Returns an iterable over all matched files. Does not load all files into memory at once. |
hasNext() | Returns true if more documents remain in the iterator. |
next() | Returns the next document struct (with text and metadata) and advances the iterator. |
<cfscript>
docService = documentService();
future = docService.loadAsync({
path: expandPath("./docs/age-when-completed-education.csv"),
parserConfigs: {
csv: {
rowPerDocument: true
}
}
});
docs = future.get();
writeOutput("Documents: " & arrayLen(docs));
</cfscript>Async method | Sync equivalent |
loadAsync(options) | load(options) |
transformAsync(docs, udf) | transform(docs, udf) |
transformSegmentsAsync(segs, udf) | transformSegments(segs, udf) |
ingestAsync(segs, store) | ingest(segs, store) |
<cfscript>
service1 = documentService();
service2 = documentService();
docs1 = service1.load({ path: expandPath("./docs/"), pattern: "*.txt" });
docs2 = service2.load({ path: expandPath("./docs/"), pattern: "*.txt" });
</cfscript><cfscript>
docService = documentService();
documents = docService.load({
path: expandPath("./docs/"),
pattern: "*.txt"
});
docService.close();
</cfscript>