import os
import pickle
import numpy as np
import faiss
from tqdm import tqdm
from typing import List
from sentence_transformers import SentenceTransformer

DATA_PATH = os.path.join(os.path.dirname(__file__), '../data/database.txt')
INDEX_PATH = os.path.join(os.path.dirname(__file__), 'faiss.index')
META_PATH = os.path.join(os.path.dirname(__file__), 'faiss_meta.pkl')

CHUNK_SIZE = 600
CHUNK_OVERLAP = 100

def chunk_text(text: str, chunk_size: int = CHUNK_SIZE, overlap: int = CHUNK_OVERLAP) -> List[str]:
    chunks = []
    start = 0
    while start < len(text):
        end = min(start + chunk_size, len(text))
        chunks.append(text[start:end])
        start += chunk_size - overlap
    return chunks

def embed_texts(texts: List[str]) -> List[List[float]]:
    model = SentenceTransformer("all-MiniLM-L6-v2")
    embeddings = model.encode(texts, show_progress_bar=True)
    return embeddings

def build_and_save_index():
    with open(DATA_PATH, 'r', encoding='utf-8') as f:
        text = f.read()
    chunks = chunk_text(text)
    embeddings = embed_texts(chunks)
    dim = len(embeddings[0])
    index = faiss.IndexFlatL2(dim)
    index.add(np.array(embeddings).astype('float32'))
    faiss.write_index(index, INDEX_PATH)
    with open(META_PATH, 'wb') as f:
        pickle.dump(chunks, f)

def load_index_and_meta():
    index = faiss.read_index(INDEX_PATH)
    with open(META_PATH, 'rb') as f:
        chunks = pickle.load(f)
    return index, chunks
