import { parse as chronoparse } from 'chrono-node'
import { zodResponseFormat } from 'openai/helpers/zod'
import { z } from 'zod'
import { hashFile } from './hashAuthentication'
import { openaiBeta } from './llmClients'
import { regexes } from './regexes'
import { uploadToS3 } from './s3Storage'
import { ChatMessage } from './types'
type RawInstaMessage = {
  sender_name: string
  content?: string
  timestamp_ms: number
}

type RawHangoutsMessage = {
  creator: {
    name: string
  }
  created_date: string
  text: string
}

// Define types
const RegexResponse = z.object({
  user1: z.string(),
  user2: z.string(),
  regex_pattern: z.string(),
})

type RegexResponseType = z.infer<typeof RegexResponse>

// Helper functions
function decodeUnicode(str: any): string {
  if (typeof str !== 'string') {
    console.warn('decodeUnicode received non-string input:', str)
    return String(str)
  }
  return str.replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) =>
    String.fromCharCode(parseInt(hex, 16))
  )
}

function clean(text: any): string {
  if (typeof text !== 'string') {
    console.warn('clean received non-string input:', text)
    return String(text)
  }

  const decodedText = decodeUnicode(text)

  const result = decodedText
    .normalize('NFKC')
    // Remove URLs
    .replace(/http\S+|www\S+|https\S+/g, '')
    // Remove zero-width and invisible characters (but keep emoji-related ones)
    .replace(/[\u200B-\u200F\uFEFF\u2028\u2029]/g, '')
    // Remove multiple spaces with single space
    .replace(/\s+/g, ' ')
    .trim()

  return result
}

async function getParsingInfoFromLLM(
  contentSample: string
): Promise<RegexResponseType> {
  const prompt = `
    Analyze the following chat transcript sample and provide the following information:
    1. The names of the two main users in the conversation.
    2. A JavaScript regex pattern to extract the timestamp, user, and message content.
    Chat transcript sample:
    ${contentSample}
    Provide your answer in the following JSON format:
    {
        "user1": "Name1",
        "user2": "Name2",
        "regex_pattern": "your_regex_pattern_here"
    }
    
    Instructions for the regex pattern:
    - Use named groups for 'timestamp', 'user', and 'message'.
    - The pattern should match the entire line, including the timestamp and user name.
    - Ensure the pattern accounts for variations in time format (e.g., "7:07 p.m." or "19:07").
    - Same goes for date format (for example, months/days could be 1 or 2 digits). Better to be too flexible than too strict.
    - The 'message' group should capture the entire message, including any punctuation or special characters.
    - Do not include the 'r' prefix in the regex pattern string.
    Example regex pattern (adjust as needed):
    "(?<timestamp>\\d{4}-\\d{2}-\\d{2},\\s\\d{1,2}:\\d{2}\\s(?:AM|PM))\\s-\\s(?<user>[^:]+):\\s(?<message>.*)"
  `

  const response = await openaiBeta({
    model: 'gpt-4o',
    messages: [
      {
        role: 'system',
        content: 'You are a helpful assistant that analyzes chat transcripts.',
      },
      { role: 'user', content: prompt },
    ],
    response_format: zodResponseFormat(RegexResponse, 'regexResponse'),
    temperature: 0,
  })

  const message = RegexResponse.parse(response.choices[0]?.message?.parsed)

  if (message) {
    return message
  }
  throw new Error('Unexpected response from OpenAI API')
}

const decodeInsta = (messages: RawInstaMessage[]) => {
  return messages
    .map((msg: RawInstaMessage, index: number) => ({
      index: index,
      user: decodeUnicode(msg.sender_name),
      message: clean(msg.content || ''),
      date: new Date(msg.timestamp_ms),
    }))
    .sort((a, b) => a.date.getTime() - b.date.getTime())
}

const decodeHangouts = (messages: RawHangoutsMessage[]) => {
  return messages
    .map((msg: RawHangoutsMessage, index: number) => ({
      index: index,
      user: decodeUnicode(msg.creator.name),
      message: msg.text ? clean(msg.text) : '',
      date: chronoparse(msg.created_date).map((item) => item.start.date())[0],
    }))
    .filter((msg) => msg.message !== '')
    .filter((msg) => msg.date)
    .sort((a, b) => a.date.getTime() - b.date.getTime())
}

const decodeTelegram = (messages: any) => {
  return messages.messages
    .map((msg: any, index: number) => ({
      index: index,
      user: decodeUnicode(msg.from),
      message: clean(msg.text || ''),
      date: new Date(msg.date),
    }))
    .sort((a: ChatMessage, b: ChatMessage) => a.date.getTime() - b.date.getTime())
}

const decodeJSON = (messages: any) => {
  // check if messages is in insta, hangouts, or telegram format
  if (messages.messages[0].sender_name) {
    console.log('Decoding Insta')
    return decodeInsta(messages.messages)
  } else if (messages.messages[0].creator) {
    console.log('Decoding Hangouts')
    return decodeHangouts(messages.messages)
  } else if (messages.messages[0].from && messages.messages[0].date) {
    console.log('Decoding Telegram')
    return decodeTelegram(messages)
  } else {
    throw new Error('Unknown chat format')
  }
}

async function parse(chatText: string): Promise<ChatMessage[]> {
  // Standardize line endings to Unix style
  chatText = chatText.replace(/\r\n/g, '\n')

  try {
    const chatData = JSON.parse(chatText)
    console.log('Parsed chat data:', chatData) // Add this line for debugging
    const parsedMessages = decodeJSON(chatData)
    console.log('First 10 messages with indexes:', parsedMessages.slice(0, 10))
    outputUserStats(parsedMessages)

    // Log the first 20 messages
    console.log('First 20 parsed messages:', parsedMessages.slice(0, 20))

    return parsedMessages
  } catch (error) {
    console.error('Error parsing JSON:', error)
    console.error('Problematic chatText:', chatText.slice(0, 1000) + '...') // Log the first 1000 characters of chatText
    // Fallback to regex parsing if JSON parsing fails
    let pattern = null

    for (const regex of regexes) {
      const matches = chatText.matchAll(new RegExp(regex, 'gm'))
      const matchCount = Array.from(matches).length
      if (matchCount > 20) {
        pattern = regex
        break
      }
    }

    if (!pattern) {
      console.log(
        'No matching regex found with more than 20 matches, getting pattern from LLM'
      )
      const llmResponse = await getParsingInfoFromLLM(chatText.slice(0, 3000))
      pattern = llmResponse.regex_pattern
    }

    console.log('Using pattern: ', pattern)

    const matches = chatText.matchAll(new RegExp(pattern, 'gm')) // Changed 'gm' to 'gms'
    const parsedData: ChatMessage[] = []
    let lastIndex = 0

    for (const [index, match] of Array.from(matches).entries()) {
      if (match.groups) {
        const { timestamp, user, message } = match.groups
        let parsedDate
        try {
          if (/^\d+$/.test(timestamp)) {
            parsedDate = new Date(Number(timestamp))
          } else {
            parsedDate = chronoparse(timestamp).map((item) =>
              item.start.date()
            )[0]
          }
          if (!parsedDate) {
            throw new Error('Invalid date')
          }

          // Add any text between matches to the previous message
          // if (parsedData.length > 0 && match.index && match.index > lastIndex) {
          //   const betweenText = chatText.slice(lastIndex, match.index).trim()
          //   if (betweenText) {
          //     console.log('Adding extra!')
          //     parsedData[parsedData.length - 1].message +=
          //       '\n' + clean(betweenText)
          //   } else {
          //     console.log("Two")
          //   }
          // } else {
          //   console.log("One")
          // }

          parsedData.push({
            index: index, // Moved to the beginning
            user: decodeUnicode(user),
            message: clean(message),
            date: parsedDate,
          })

          lastIndex = match.index! + match[0].length
        } catch (error) {
          console.error('Error parsing timestamp: ', timestamp, error)
        }
      }
    }

    console.log('Parsed data:', parsedData)

    // Add any remaining text after the last match to the last message
    if (parsedData.length > 0 && lastIndex < chatText.length) {
      const remainingText = chatText.slice(lastIndex).trim()
      if (remainingText) {
        parsedData[parsedData.length - 1].message += '\n' + clean(remainingText)
      }
    }

    if (parsedData.length < 5) {
      const file = new File([chatText], 'chat.txt')
      const hashString = await hashFile(file)
      uploadToS3(file, hashString, true)
    } else {
      regexes.push(pattern)
    }

    const sortedData = parsedData.sort((a, b) => a.date.getTime() - b.date.getTime())
    
    outputUserStats(sortedData)

    // Log the first 20 messages
    console.log('First 20 parsed messages:', sortedData.slice(0, 20))

    return sortedData
  }
}

function outputUserStats(messages: ChatMessage[]) {
  const userStats: { [key: string]: number } = {}

  messages.forEach((message) => {
    userStats[message.user] = (userStats[message.user] || 0) + 1
  })

  const userCount = Object.keys(userStats).length

  console.log(`Number of people identified: ${userCount}`)
  console.log('Message count per user:')
  Object.entries(userStats).forEach(([user, count]) => {
    console.log(`${user}: ${count} messages`)
  })
}

export function sanitizeFilename(str: string): string {
  return str
    .normalize('NFKD')
    .replace(/[^\x00-\x7F]/g, '')
    .trim()
}

export { parse }
