This shows using instructor models in the pgml SDK for more advanced use cases.
Python
from pgml import Collection, Model, Splitter, Pipeline
from datasets import load_dataset
from dotenv import load_dotenvJavaScript
const pgml = require("pgml");
require("dotenv").config(); Python
collection = Collection("squad_collection_1")JavaScript
const collection = pgml.newCollection("my_javascript_qai_collection"); Python
model = Model("hkunlp/instructor-base", parameters={
"instruction": "Represent the Wikipedia document for retrieval: "
})
pipeline = Pipeline("squad_instruction", model, Splitter())
await collection.add_pipeline(pipeline)JavaScript
const model = pgml.newModel("hkunlp/instructor-base", "pgml", {
instruction: "Represent the Wikipedia document for retrieval: ",
});
const pipeline = pgml.newPipeline(
"my_javascript_qai_pipeline",
model,
pgml.newSplitter(),
);
await collection.add_pipeline(pipeline);Python
data = load_dataset("squad")
documents = [
{"id": ..., "text": ...} for r in data
]
await collection.upsert_documents(documents) JavaScript
const documents = [
{
id: "...",
text: "...",
},
];
await collection.upsert_documents(documents);Python
results = await collection.query()
.vector_recall(query, pipeline, {
"instruction": "Represent the Wikipedia question for retrieving supporting documents: "
})
.fetch_all()JavaScript
const queryResults = await collection
.query()
.vector_recall(query, pipeline, {
instruction:
"Represent the Wikipedia question for retrieving supporting documents: ",
})
.fetch_all();