"use client";
 
import type { ReactNode } from "react";
import {
  AssistantRuntimeProvider,
  useLocalRuntime,
  type ChatModelAdapter,
  toCoreMessages,
  TextContentPart,
} from "@assistant-ui/react";
// import { streamText } from 'ai';
// import { createOpenAI } from "@ai-sdk/openai";
import { OpenAI } from "openai";

type CustomMetadata = {
    usage: {
      promptTokens: number;
      completionTokens: number;
    };
    ttft: number | null;
    tps: number | null;
}
  
export function RuntimeProvider({
  children,
  chatData
}: Readonly<{
  children: ReactNode;
  chatData: any;
}>) {
  console.log('chatData in RuntimeProvider: ', chatData);

  const MyModelAdapter: ChatModelAdapter = {
    async *run({ messages }) {
      const openai = new OpenAI({
        dangerouslyAllowBrowser: true, // for api key
        apiKey: chatData.defaultApiKey,
        baseURL: chatData.baseUrl,
      });
  
      const sendRequestTime = performance.now();

      const coreMessages = toCoreMessages(messages);
      // TODO: remove once Cerebras supports list for content
      coreMessages.forEach(element => {
        if (Array.isArray(element.content)) {
            element.content = ((element.content[0] as TextContentPart).text as unknown) as TextContentPart[];
        }
      });

      const stream = await openai.chat.completions.create({
        messages: coreMessages as OpenAI.ChatCompletionMessageParam[],
        model: chatData.externalAlias,
        stream: true,
        stream_options: {
          include_usage: true,
        },
      });
  
      let firstTokenTime;
      let usage = {
        promptTokens: 0,
        completionTokens: 0,
      };
      let text = "";
      for await (const chunk of stream) {
        if (!firstTokenTime) firstTokenTime = performance.now();
        if(chunk.choices[0] && chunk.choices[0].delta.content && chunk.choices[0].delta.content !== '') {
            text += chunk.choices[0]!.delta.content;
        }
        yield {
          content: [{ type: "text", text }],
        };

        //  keep updating for token chuck with usage
        if (chunk.usage) {
            usage.promptTokens = chunk.usage?.prompt_tokens ?? 0;
            usage.completionTokens = chunk.usage?.completion_tokens ?? 0;
        }
      }

      const lastTokenTime = performance.now();
      const tps = firstTokenTime ? ((usage.completionTokens/((lastTokenTime - firstTokenTime)/1000))) : null;
      yield {
        metadata: {
          custom: {
            usage,
            ttft: firstTokenTime ? firstTokenTime - sendRequestTime : null, 
            //   tps: (lastTokenTime.getTime() - firstTokenTime.getTime()) / (usage.completionTokens - 1),//milliseconds per token
            tps, // TPS
          } satisfies CustomMetadata,
        },
      } as any;
    },
  };

  const runtime = useLocalRuntime(MyModelAdapter);
 
  return (
    <AssistantRuntimeProvider runtime={runtime}>
      {children}
    </AssistantRuntimeProvider>
  );
}