import dataclasses import numpy as np from openai import OpenAI def get_batch_embeddings( client: OpenAI, texts: list[str], model="text-embedding-3-small" ) -> np.ndarray: embeddings = client.embeddings.create(input=texts, model=model) np_embeddings = np.array( [embeddings.data[i].embedding for i in range(len(embeddings.data))] ) return np_embeddings def get_one_embedding( client: OpenAI, text: str, model="text-embedding-3-small" ) -> np.ndarray: embedding = client.embeddings.create(input=[text], model=model).data[0].embedding return np.array(embedding) @dataclasses.dataclass class Chunk: text: str title: str video_idx: int link: str @dataclasses.dataclass class Dataset: chunks: list[Chunk] embeddings: np.ndarray def __len__(self): return len(self.chunks)