Pre-Final Backup

This commit is contained in:
2026-05-03 13:26:31 +01:00
parent cd11e76c07
commit 165af509ef
19 changed files with 2829 additions and 1223 deletions

View File

@@ -20,14 +20,18 @@ import { keyManager } from "@/lib/services/ai/key-manager";
const PRIMARY_ANALYSIS_MODEL =
process.env.AI_MODEL_PRIMARY || "gemini-3.1-flash-lite-preview";
const GEMINI_SECONDARY_ANALYSIS_MODEL =
process.env.AI_MODEL_SECONDARY_GEMINI || "";
process.env.AI_MODEL_SECONDARY_GEMINI || process.env.AI_MODEL_SECONDARY || "";
const FALLBACK_ANALYSIS_MODEL =
process.env.AI_MODEL_FALLBACK || "llama-3.3-70b-versatile";
process.env.AI_MODEL_FALLBACK || "mistral-large-latest";
const FALLBACK_REPAIR_MODEL =
process.env.AI_MODEL_FALLBACK_REPAIR || "llama-3.3-70b-versatile";
const GROQ_API_KEY =
process.env.GROQ_API_KEY?.trim() || process.env.AI_GROQ_API_KEY?.trim() || "";
const GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions";
process.env.AI_MODEL_FALLBACK_REPAIR || "mistral-large-latest";
const MISTRAL_API_KEY = process.env.MISTRAL_API_KEY?.trim() || "";
const MISTRAL_API_URL = "https://api.mistral.ai/v1/chat/completions";
const MISTRAL_OCR_API_URL = "https://api.mistral.ai/v1/ocr";
const MISTRAL_VISION_MODEL =
process.env.AI_MODEL_MISTRAL_VISION || "pixtral-large-latest";
const MISTRAL_OCR_MODEL =
process.env.AI_MODEL_MISTRAL_OCR || "mistral-ocr-latest";
const GEMINI_ANALYSIS_MODELS = Array.from(
new Set(
@@ -36,7 +40,7 @@ const GEMINI_ANALYSIS_MODELS = Array.from(
);
const ANALYSIS_MODELS = Array.from(
new Set([...GEMINI_ANALYSIS_MODELS, `groq:${FALLBACK_ANALYSIS_MODEL}`]),
new Set([...GEMINI_ANALYSIS_MODELS, `mistral:${FALLBACK_ANALYSIS_MODEL}`]),
);
const FORCE_FALLBACK_TEST =
@@ -89,7 +93,7 @@ const isAdaptiveKeyPoints = (
};
export class AIService {
private static isTransientGeminiError(message: string): boolean {
private static isTransientAIError(message: string): boolean {
const normalized = message.toLowerCase();
return (
normalized.includes("503") ||
@@ -282,7 +286,7 @@ export class AIService {
// Better error messages
if (errorMessage.includes("API key")) {
throw new Error(
"Invalid or missing AI API key. Check AI_API_KEY1/2/3 for Gemini and GROQ_API_KEY for Groq fallback.",
"Invalid or missing AI API key. Check AI_API_KEY1/2/3 for Gemini and MISTRAL_API_KEY for Mistral fallback.",
);
} else if (errorMessage.includes("INVALID_CONTRACT:")) {
const reason = String(errorMessage)
@@ -291,9 +295,9 @@ export class AIService {
throw new Error(
reason || "Uploaded file is not recognized as a valid contract.",
);
} else if (this.isTransientGeminiError(errorMessage)) {
} else if (this.isTransientAIError(errorMessage)) {
throw new Error(
`Gemini is temporarily overloaded for the configured analysis models (${ANALYSIS_MODELS.join(", ")}). The app retried automatically, but both models are still busy. Please try again in a few minutes.`,
`The AI providers (Gemini/Mistral) are temporarily overloaded for the configured analysis models (${ANALYSIS_MODELS.join(", ")}). The app retried automatically, but both providers are still busy. Please try again in a few minutes.`,
);
} else if (
errorMessage.includes("not found") ||
@@ -337,7 +341,7 @@ export class AIService {
}
} else if (errorMessage.includes("quota")) {
throw new Error(
"Limit exceeded. Gemini or Groq quota may be exhausted. Check your provider dashboards for usage and limits.",
"Limit exceeded. Gemini or Mistral quota may be exhausted. Check your provider dashboards for usage and limits.",
);
} else {
throw new Error(`Error analyzing contract: ${errorMessage}`);
@@ -389,11 +393,11 @@ export class AIService {
return parseAiJsonResponse(text);
}
private static isGroqConfigured(): boolean {
return GROQ_API_KEY.length > 0;
private static isMistralConfigured(): boolean {
return MISTRAL_API_KEY.length > 0;
}
private static async generateWithGroq(input: {
private static async generateWithMistral(input: {
model?: string;
prompt: string;
systemPrompt?: string;
@@ -402,9 +406,9 @@ export class AIService {
temperature?: number;
topP?: number;
}): Promise<string> {
if (!this.isGroqConfigured()) {
if (!this.isMistralConfigured()) {
throw new Error(
"Groq fallback is not configured. Set GROQ_API_KEY (or AI_GROQ_API_KEY).",
"Mistral fallback is not configured. Set MISTRAL_API_KEY.",
);
}
@@ -418,23 +422,25 @@ export class AIService {
messages.push({ role: "user", content: input.prompt });
// Use json_object mode (compatible with all models)
const responseFormat: Record<string, unknown> | undefined = input.responseAsJson
? { type: "json_object" as const }
: undefined;
const responseFormat: Record<string, unknown> | undefined =
input.responseAsJson ? { type: "json_object" as const } : undefined;
const temperature = input.temperature ?? 0;
const top_p = temperature === 0 ? 1 : (input.topP ?? 0.95);
const body: Record<string, unknown> = {
model: modelName,
temperature: input.temperature ?? 0,
top_p: input.topP ?? 0.95,
temperature,
top_p,
max_tokens: input.maxOutputTokens,
response_format: responseFormat,
messages,
};
const response = await fetch(GROQ_API_URL, {
const response = await fetch(MISTRAL_API_URL, {
method: "POST",
headers: {
Authorization: `Bearer ${GROQ_API_KEY}`,
Authorization: `Bearer ${MISTRAL_API_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify(body),
@@ -443,7 +449,7 @@ export class AIService {
if (!response.ok) {
const details = await response.text();
throw new Error(
`Groq API error ${response.status}: ${details.slice(0, 300)}`,
`Mistral API error ${response.status}: ${details.slice(0, 300)}`,
);
}
@@ -453,13 +459,92 @@ export class AIService {
const text = json.choices?.[0]?.message?.content?.trim() || "";
if (!text) {
throw new Error("Empty response from Groq fallback model.");
throw new Error("Empty response from Mistral fallback model.");
}
return text;
}
private static async generateWithGroqModelChain(input: {
/**
* Multimodal analysis using Mistral Pixtral vision model.
* Sends base64-encoded images directly to Pixtral for analysis,
* eliminating the need for a separate OCR bridge when Gemini is down.
*/
private static async generateWithMistralVision(input: {
prompt: string;
base64: string;
mimeType: string;
systemPrompt?: string;
responseAsJson?: boolean;
maxOutputTokens?: number;
}): Promise<string> {
if (!this.isMistralConfigured()) {
throw new Error(
"Mistral fallback is not configured. Set MISTRAL_API_KEY.",
);
}
const messages: Array<{ role: string; content: unknown }> = [];
if (input.systemPrompt) {
messages.push({ role: "system", content: input.systemPrompt });
}
// OpenAI-compatible multimodal content format for Pixtral vision
messages.push({
role: "user",
content: [
{ type: "text", text: input.prompt },
{
type: "image_url",
image_url: {
url: `data:${input.mimeType};base64,${input.base64}`,
},
},
],
});
const responseFormat: Record<string, unknown> | undefined =
input.responseAsJson ? { type: "json_object" as const } : undefined;
const body: Record<string, unknown> = {
model: MISTRAL_VISION_MODEL,
temperature: 0,
top_p: 1,
max_tokens: input.maxOutputTokens ?? 16384,
response_format: responseFormat,
messages,
};
const response = await fetch(MISTRAL_API_URL, {
method: "POST",
headers: {
Authorization: `Bearer ${MISTRAL_API_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify(body),
});
if (!response.ok) {
const details = await response.text();
throw new Error(
`Mistral Vision API error ${response.status}: ${details.slice(0, 300)}`,
);
}
const json = (await response.json()) as {
choices?: Array<{ message?: { content?: string | null } }>;
};
const text = json.choices?.[0]?.message?.content?.trim() || "";
if (!text) {
throw new Error("Empty response from Mistral Pixtral vision model.");
}
console.log(`✅ Mistral Pixtral vision analysis succeeded`);
return text;
}
private static async generateWithMistralModelChain(input: {
preferredModel?: string;
prompt: string;
systemPrompt?: string;
@@ -473,9 +558,9 @@ export class AIService {
[
input.preferredModel,
FALLBACK_ANALYSIS_MODEL,
"llama-3.3-70b-versatile",
"qwen-2.5-32b",
"llama-3.1-8b-instant",
"mistral-large-latest",
"mistral-small-latest",
"open-mistral-nemo",
].filter(Boolean),
),
) as string[];
@@ -484,7 +569,7 @@ export class AIService {
for (const modelName of candidates) {
try {
const text = await this.generateWithGroq({
const text = await this.generateWithMistral({
model: modelName,
prompt: input.prompt,
systemPrompt: input.systemPrompt,
@@ -495,14 +580,14 @@ export class AIService {
});
if (modelName !== (input.preferredModel || FALLBACK_ANALYSIS_MODEL)) {
console.warn(
`Groq switched to fallback model ${modelName} after primary fallback model failed.`,
`Mistral switched to fallback model ${modelName} after primary fallback model failed.`,
);
}
return text;
} catch (error) {
lastError = error;
console.warn(
`Groq model ${modelName} failed. Trying next fallback model.`,
`Mistral model ${modelName} failed. Trying next fallback model.`,
error instanceof Error ? error.message : String(error),
);
}
@@ -510,36 +595,79 @@ export class AIService {
throw lastError instanceof Error
? lastError
: new Error("All Groq fallback models failed.");
: new Error("All Mistral fallback models failed.");
}
/**
* Build a Groq-optimized system prompt that mirrors the Gemini behavior.
* Build a Mistral-optimized system prompt that mirrors the Gemini behavior.
* This separates role & formatting rules from user content for better
* instruction adherence on open-source models.
*
* Unlike the Gemini prompt which sends examples with the file inline,
* this prompt is designed to prevent hallucination by using explicit
* placeholder markers instead of realistic example values.
*/
private static buildGroqSystemPrompt(): string {
private static buildMistralSystemPrompt(): string {
return `You are an expert contract analysis engine for the BFSI (Banking, Financial Services, and Insurance) sector.
You receive the full text content of a contract document below and must extract structured information from it.
You receive the full text content of a contract document and must extract structured information from it.
CRITICAL OUTPUT RULES:
ABSOLUTE RULES — VIOLATION OF THESE IS A CRITICAL FAILURE:
1. Return ONLY valid, parseable JSON — no markdown, no backticks, no explanations, no commentary.
2. Your JSON must conform EXACTLY to the schema specified in the user prompt.
3. Every required field MUST be present. Use null for missing strings/numbers and [] for missing arrays.
4. All dates MUST be in ISO YYYY-MM-DD format or null.
5. The "premium" field must be a positive number or null — NO currency symbols.
6. The "type" field MUST be one of: INSURANCE_AUTO, INSURANCE_HOME, INSURANCE_HEALTH, INSURANCE_LIFE, LOAN, CREDIT_CARD, INVESTMENT, OTHER.
7. Do NOT hallucinate or invent data that is not present in the document.
8. Preserve original language in extractedText and sourceSnippet fields (accents, special characters).
9. The "summary" must be 4-6 professional sentences covering parties, obligations, coverage, exclusions, and deadlines.
10. The "extractedText" must contain at least 30 characters of actual document content.
11. The "keyPoints.explainability" array must have at least 4 items for critical fields when data is available.
12. contractValidation.confidence must reflect actual extraction certainty (0-100).
13. When uncertain about a value, use null and set a lower confidence — never guess.
14. Parse localized number formats correctly (e.g., 1.234,56 vs 1,234.56).
15. Detect the contract language and set the "language" field accordingly (ISO 639-1).
2. EVERY value you output MUST come directly from the document text provided to you.
3. If a piece of information does NOT exist in the document text, you MUST use null (for strings/numbers) or [] (for arrays). NEVER invent, assume, or guess data.
4. Do NOT copy example values from the schema description — they are placeholders, not real data.
5. The "extractedText" field MUST contain actual verbatim text from the document — not a summary, not examples.
You are replacing a more capable multimodal model (Gemini) as a fallback. Your output quality MUST match production standards.`;
JSON SCHEMA (use exact field names):
{
"language": "<ISO 639-1 code detected from document>",
"title": "<exact contract title from document or null>",
"type": "<one of: INSURANCE_AUTO, INSURANCE_HOME, INSURANCE_HEALTH, INSURANCE_LIFE, LOAN, CREDIT_CARD, INVESTMENT, OTHER>",
"provider": "<company/institution name from document or null>",
"policyNumber": "<policy/contract number from document or null>",
"startDate": "<YYYY-MM-DD from document or null>",
"endDate": "<YYYY-MM-DD from document or null>",
"premium": <number from document or null — NO currency symbols>,
"premiumCurrency": "<currency code from document or null>",
"summary": "<4-6 sentences summarizing the actual contract content>",
"keyPoints": {
"guarantees": ["<actual guarantee from document>"],
"exclusions": ["<actual exclusion from document>"],
"franchise": "<deductible/penalty from document or null>",
"importantDates": ["<actual date from document with description>"],
"explainability": [
{
"field": "<field name>",
"why": "<why this value was extracted>",
"sourceSnippet": "<verbatim quote from document>",
"sourceHints": { "page": "<page or null>", "section": "<section or null>", "confidence": <0-100> }
}
]
},
"keyPeople": [{"name": "<from document>", "role": "<from document or null>", "email": "<from document or null>", "phone": "<from document or null>"}],
"contactInfo": {"name": "<from document or null>", "email": null, "phone": null, "address": null, "role": null},
"importantContacts": [],
"relevantDates": [{"date": "<YYYY-MM-DD>", "description": "<from document>", "type": "<EXPIRATION|RENEWAL|PAYMENT|REVIEW|OTHER>"}],
"extractedText": "<verbatim text from the document, max 12000 chars>",
"contractValidation": {
"isValidContract": true,
"confidence": <0-100 reflecting how much data you actually found>,
"reason": null
}
}
FIELD RULES:
- All dates: ISO YYYY-MM-DD or null
- premium: positive number or null — NO currency symbols, NO text
- type: must be exactly one of the 8 values listed
- summary: 4-6 professional sentences about THIS specific contract. If no contract text is found, output "No contract data found in the document text."
- extractedText: must contain at least 30 characters of ACTUAL document content. If no text is found, output "No document text could be extracted. Please ensure the document is not a scanned image."
- explainability: at least 4 items with real sourceSnippets from the document
- confidence: reflects how much data you actually found (not how confident the model is)
- Parse localized number formats correctly (1.234,56 vs 1,234.56)
- Detect the contract language and set "language" accordingly
You are replacing a more capable multimodal model (Gemini) as a fallback. Your output quality MUST match production standards. ACCURACY is more important than completeness — it is better to return null than to guess.`;
}
private static async generateAnalysisWithFallback(input: {
@@ -551,27 +679,52 @@ You are replacing a more capable multimodal model (Gemini) as a fallback. Your o
let lastError: unknown = null;
const forceFallback = Boolean(input.forceFallbackModelTest);
const buildGroundedGroqPrompt = async (basePrompt: string) => {
const groundingText = await this.extractGroqGroundingText({
const buildGroundedMistralPrompt = async () => {
const groundingText = await this.extractMistralGroundingText({
base64: input.base64,
mimeType: input.mimeType,
});
if (!groundingText) {
return `${basePrompt}\n\nGROQ FALLBACK RULES:\n- You do not have direct binary file access in this fallback path.\n- Do not hallucinate values; use null/empty arrays when data is missing.\n- Keep contractValidation conservative when uncertain.\n- Set contractValidation.confidence to at most 60 when no grounding text is available.`;
throw new Error(
"INVALID_CONTRACT:No extractable text found in this PDF after OCR fallback. Please verify the file is readable and not password-protected.",
);
}
return `${basePrompt}\n\n--- BEGIN GROUNDED DOCUMENT TEXT (AUTHORITATIVE SOURCE) ---\n${groundingText}\n--- END GROUNDED DOCUMENT TEXT ---\n\nGROQ FALLBACK RULES:\n- Extract fields ONLY from the grounded document text above. This text is the full contract content.\n- Do not invent, assume, or hallucinate any values not explicitly present in the above text.\n- If a field's data is not found in the text, use null (for strings/numbers) or [] (for arrays).\n- Dates: convert any date format found in the text to YYYY-MM-DD.\n- Numbers: parse localized formats (comma vs period) correctly before setting numeric fields.\n- contractValidation.confidence should reflect how much data you could extract from the text.`;
return `--- BEGIN GROUNDED DOCUMENT TEXT (AUTHORITATIVE SOURCE) ---
${groundingText}
--- END GROUNDED DOCUMENT TEXT ---
MISTRAL FALLBACK RULES:
- Extract fields ONLY from the grounded document text above. This text is the full contract content.
- Do not invent, assume, or hallucinate any values not explicitly present in the above text.
- If a field's data is not found in the text, use null (for strings/numbers) or [] (for arrays).
- Dates: convert any date format found in the text to YYYY-MM-DD.
- Numbers: parse localized formats (comma vs period) correctly before setting numeric fields.
- contractValidation.confidence should reflect how much data you could extract from the text.`;
};
if (forceFallback) {
console.warn(
`🧪 Fallback test mode enabled. Skipping Gemini and forcing Groq model ${FALLBACK_ANALYSIS_MODEL}.`,
`🧪 Fallback test mode enabled. Skipping Gemini and forcing Mistral model ${FALLBACK_ANALYSIS_MODEL}.`,
);
const groundedPrompt = await buildGroundedGroqPrompt(input.prompt);
return this.generateWithGroqModelChain({
// For images: use Pixtral vision model directly (multimodal — no OCR bridge needed)
if (input.mimeType.startsWith("image/") && this.isMistralConfigured()) {
return this.generateWithMistralVision({
systemPrompt: this.buildMistralSystemPrompt(),
prompt: `TEST MODE: You are the forced fallback model. Return ONLY valid JSON and preserve the required schema exactly. Extract information from the provided image.`,
base64: input.base64,
mimeType: input.mimeType,
responseAsJson: true,
maxOutputTokens: 16384,
});
}
const groundedPrompt = await buildGroundedMistralPrompt();
return this.generateWithMistralModelChain({
preferredModel: FALLBACK_ANALYSIS_MODEL,
systemPrompt: this.buildGroqSystemPrompt(),
systemPrompt: this.buildMistralSystemPrompt(),
prompt: `${groundedPrompt}\n\nTEST MODE: You are the forced fallback model. Return ONLY valid JSON and preserve the required schema exactly.`,
responseAsJson: true,
maxOutputTokens: 8192,
@@ -610,7 +763,6 @@ You are replacing a more capable multimodal model (Gemini) as a fallback. Your o
throw new Error("Empty response");
});
} catch (error: any) {
if (error.message?.includes("CRITICAL_KEY_EXHAUSTION")) throw error;
lastError = error;
console.warn(
`Analysis with model ${modelName} failed. Trying next model.`,
@@ -654,35 +806,54 @@ You are replacing a more capable multimodal model (Gemini) as a fallback. Your o
throw new Error("Empty response from fallback");
});
} catch (error: any) {
if (error.message?.includes("CRITICAL_KEY_EXHAUSTION")) throw error;
console.warn("Lenient generation also failed:", error);
}
// === Groq fallback path ===
// === Mistral AI fallback path ===
console.warn(
"All Gemini models exhausted. Activating Groq fallback pipeline...",
"All Gemini models exhausted. Activating Mistral AI fallback pipeline...",
);
try {
const groundedPrompt = await buildGroundedGroqPrompt(input.prompt);
const groqText = await this.generateWithGroqModelChain({
// For images: use Pixtral vision model directly (multimodal — no OCR bridge needed)
if (input.mimeType.startsWith("image/") && this.isMistralConfigured()) {
const mistralText = await this.generateWithMistralVision({
systemPrompt: this.buildMistralSystemPrompt(),
prompt: `IMPORTANT: Return ONLY valid JSON and preserve the required schema exactly. Do not add any text outside of the JSON object. Extract data from the provided image.`,
base64: input.base64,
mimeType: input.mimeType,
responseAsJson: true,
maxOutputTokens: 16384,
});
console.log(
`✅ Analysis fallback with Mistral Pixtral vision succeeded`,
);
return mistralText;
}
// For PDFs/text: extract text and use text-only Mistral
const groundedPrompt = await buildGroundedMistralPrompt();
const mistralText = await this.generateWithMistralModelChain({
preferredModel: FALLBACK_ANALYSIS_MODEL,
systemPrompt: this.buildGroqSystemPrompt(),
systemPrompt: this.buildMistralSystemPrompt(),
prompt: `${groundedPrompt}\n\nIMPORTANT: Return ONLY valid JSON and preserve the required schema exactly. Do not add any text outside of the JSON object.`,
responseAsJson: true,
maxOutputTokens: 8192,
});
console.log(
`✅ Analysis fallback with Groq model ${FALLBACK_ANALYSIS_MODEL} succeeded`,
`✅ Analysis fallback with Mistral model ${FALLBACK_ANALYSIS_MODEL} succeeded`,
);
return mistralText;
} catch (mistralError) {
console.warn("Mistral analysis fallback failed:", mistralError);
lastError = new Error(
`Mistral fallback also failed: ${mistralError instanceof Error ? mistralError.message : String(mistralError)}. Original error: ${lastError instanceof Error ? lastError.message : String(lastError)}`,
);
return groqText;
} catch (groqError) {
console.warn("Groq analysis fallback failed:", groqError);
}
throw lastError instanceof Error
? lastError
: new Error(
"All analysis models (Gemini + Groq fallback) failed to generate content.",
"All analysis models (Gemini + Mistral fallback) failed to generate content.",
);
}
@@ -746,7 +917,7 @@ Original parse error: ${parseError}
Malformed response to fix:
${malformedResponse.slice(0, 14000)}`;
const repairedText = await this.generateWithGroqModelChain({
const repairedText = await this.generateWithMistralModelChain({
preferredModel: FALLBACK_REPAIR_MODEL,
prompt: repairPrompt,
responseAsJson: true,
@@ -766,7 +937,7 @@ ${malformedResponse.slice(0, 14000)}`;
} catch (firstRepairParseError) {
const secondPassPrompt = `${repairPrompt}\n\nSECOND PASS CORRECTION:\nYour previous repaired JSON was still invalid.\nReason: ${firstRepairParseError instanceof Error ? firstRepairParseError.message : "Invalid JSON"}.\nReturn ONLY strict valid JSON.`;
const secondPass = await this.generateWithGroqModelChain({
const secondPass = await this.generateWithMistralModelChain({
preferredModel: FALLBACK_REPAIR_MODEL,
prompt: secondPassPrompt,
responseAsJson: true,
@@ -785,7 +956,12 @@ ${malformedResponse.slice(0, 14000)}`;
}
}
private static async extractGroqGroundingText(input: {
/**
* Extract grounding text for Mistral text-only fallback.
* For PDFs: extracts text directly using pdf-parse (local, no AI needed).
* For images: returns empty string — Pixtral vision handles images directly.
*/
private static async extractMistralGroundingText(input: {
base64: string;
mimeType: string;
}): Promise<string> {
@@ -793,13 +969,43 @@ ${malformedResponse.slice(0, 14000)}`;
if (input.mimeType === "application/pdf") {
try {
const pdfBuffer = Buffer.from(input.base64, "base64");
const { PDFParse } = await import("pdf-parse");
const parser = new PDFParse({ data: pdfBuffer });
let parsed: { text?: string };
// Handle Next.js Webpack/Turbopack CJS/ESM interop
let pdfParseModule: any;
try {
parsed = await parser.getText();
} finally {
await parser.destroy();
pdfParseModule = require("pdf-parse");
} catch {
pdfParseModule = await import("pdf-parse");
}
const PDFParseClass =
pdfParseModule?.PDFParse ||
pdfParseModule?.default?.PDFParse ||
(typeof pdfParseModule === "function" ? pdfParseModule : null);
if (!PDFParseClass) {
throw new Error(
"Could not resolve PDFParse constructor from pdf-parse module.",
);
}
let parsed: { text?: string };
if (
typeof PDFParseClass === "function" &&
!PDFParseClass.prototype?.getText
) {
// Fallback if it's actually the legacy function export
parsed = await PDFParseClass(pdfBuffer);
} else {
const parser = new PDFParseClass({ data: pdfBuffer });
try {
parsed = await parser.getText();
} finally {
if (typeof parser.destroy === "function") {
await parser.destroy();
}
}
}
const text = (parsed?.text || "")
@@ -807,66 +1013,110 @@ ${malformedResponse.slice(0, 14000)}`;
.replace(/\n{3,}/g, "\n\n")
.trim();
if (text && text.length > 50) {
if (text && text.length >= 10) {
console.log(
`📄 Groq grounding: extracted ${text.length} chars from PDF`,
`📄 Mistral grounding: extracted ${text.length} chars from PDF`,
);
return text.slice(0, 50000);
}
console.warn(
`📄 Mistral grounding: native PDF text extraction too short (length: ${text?.length || 0}). Trying OCR fallback...`,
);
} catch (error) {
console.warn(
"PDF grounding extraction failed for Groq fallback.",
error,
"📄 PDF grounding extraction failed for Mistral fallback:",
error instanceof Error ? error.message : error,
);
}
// OCR fallback for scanned PDFs.
try {
const ocrText = await this.extractMistralPdfTextWithOcr(input.base64);
if (ocrText.length >= 10) {
console.log(
`📄 Mistral grounding OCR: extracted ${ocrText.length} chars from scanned PDF`,
);
return ocrText.slice(0, 50000);
}
} catch (ocrError) {
console.warn(
"📄 PDF OCR fallback failed for Mistral grounding:",
ocrError instanceof Error ? ocrError.message : ocrError,
);
}
}
// For images: try to extract text using Gemini OCR as grounding bridge.
// This gives Groq the text content it needs since it can't read images.
if (input.mimeType.startsWith("image/")) {
try {
const ocrText = await keyManager.execute(async (genAI) => {
const model = genAI.getGenerativeModel({
model: PRIMARY_ANALYSIS_MODEL,
generationConfig: {
temperature: 0,
maxOutputTokens: 8192,
},
});
const result = await model.generateContent([
"Extract ALL text from this document image exactly as it appears. Preserve structure, formatting, and all content. Return ONLY the raw text, no JSON, no commentary.",
{
inlineData: {
data: input.base64,
mimeType: input.mimeType,
},
},
]);
return result.response.text()?.trim() || "";
});
if (ocrText && ocrText.length > 50) {
console.log(
`🖼️ Groq grounding: extracted ${ocrText.length} chars from image via Gemini OCR bridge`,
);
return ocrText.slice(0, 50000);
}
} catch (error: any) {
// Gemini OCR bridge failed (likely key exhaustion), continue without
if (!error.message?.includes("CRITICAL_KEY_EXHAUSTION")) {
console.warn(
"Image grounding via Gemini OCR failed for Groq fallback; continuing without grounded text.",
error,
);
}
}
}
// For images: Pixtral vision model handles images directly via
// generateWithMistralVision, so no grounding text extraction is needed.
// The calling code in generateAnalysisWithFallback routes images
// to the vision path instead of the text-only grounded path.
return "";
}
private static async extractMistralPdfTextWithOcr(
pdfBase64: string,
): Promise<string> {
if (!this.isMistralConfigured()) {
return "";
}
const body = {
model: MISTRAL_OCR_MODEL,
document: {
type: "document_url",
document_url: `data:application/pdf;base64,${pdfBase64}`,
},
include_image_base64: false,
};
const response = await fetch(MISTRAL_OCR_API_URL, {
method: "POST",
headers: {
Authorization: `Bearer ${MISTRAL_API_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify(body),
});
if (!response.ok) {
const details = await response.text();
throw new Error(
`Mistral OCR API error ${response.status}: ${details.slice(0, 300)}`,
);
}
const json = (await response.json()) as {
text?: string;
pages?: Array<{
text?: string;
markdown?: string;
content?: string;
}>;
output?: Array<{
text?: string;
markdown?: string;
content?: string;
}>;
};
const pageTexts = [
...(Array.isArray(json.pages) ? json.pages : []),
...(Array.isArray(json.output) ? json.output : []),
]
.map((page) => page.markdown || page.text || page.content || "")
.filter((value) => value.trim().length > 0);
const merged = [json.text || "", ...pageTexts]
.join("\n\n")
.replace(/\r/g, "\n")
.replace(/\n{3,}/g, "\n\n")
.trim();
return merged;
}
/**
* Emergency fallback: Extract key contract fields from raw text when JSON is completely malformed.
* Builds a minimal but valid JSON structure from pattern-matched fields.
@@ -1406,7 +1656,7 @@ Include one short disclaimer only when legal context is discussed: "This is gene
if (!rawAnswer) {
try {
rawAnswer = await this.generateWithGroqModelChain({
rawAnswer = await this.generateWithMistralModelChain({
preferredModel: FALLBACK_ANALYSIS_MODEL,
systemPrompt: `You are a senior BFSI contract advisor. Answer questions about contracts accurately and professionally. Respond entirely in ${languageName}. Use plain text only — no markdown, no bold, no headers, no bullet points. Base your answers ONLY on the provided contract content. If information is missing, say so.`,
prompt,
@@ -1416,10 +1666,10 @@ Include one short disclaimer only when legal context is discussed: "This is gene
topP: 0.95,
});
console.log(
`✅ Q&A fallback with Groq model ${FALLBACK_ANALYSIS_MODEL} succeeded in ${languageName}`,
`✅ Q&A fallback with Mistral model ${FALLBACK_ANALYSIS_MODEL} succeeded in ${languageName}`,
);
} catch (groqError) {
lastError = groqError;
} catch (mistralError) {
lastError = mistralError;
}
}
@@ -1444,11 +1694,11 @@ Include one short disclaimer only when legal context is discussed: "This is gene
const errorMessage =
error instanceof Error ? error.message : String(error);
if (errorMessage.includes("API key")) {
throw new Error("Invalid or missing AI API key (Gemini/Groq).");
throw new Error("Invalid or missing AI API key (Gemini/Mistral).");
}
if (this.isTransientGeminiError(errorMessage)) {
if (this.isTransientAIError(errorMessage)) {
throw new Error(
`Gemini is temporarily overloaded for the configured Q&A models (${ANALYSIS_MODELS.join(", ")}). Please try again in a few minutes.`,
`The AI providers (Gemini/Mistral) are temporarily overloaded for the configured Q&A models (${ANALYSIS_MODELS.join(", ")}). Please try again in a few minutes.`,
);
}
throw new Error(`Error answering question: ${errorMessage}`);

View File

@@ -76,7 +76,12 @@ function toDateOrNull(value: unknown): string | null {
function toStringList(value: unknown): string[] {
if (!Array.isArray(value)) return [];
return value
.map((item) => String(item ?? "").trim())
.map((item) => {
if (typeof item === "object" && item !== null) {
return Object.values(item).filter(Boolean).join(" - ");
}
return String(item ?? "").trim();
})
.filter((item) => item.length > 0)
.slice(0, 25);
}

View File

@@ -0,0 +1,280 @@
import nodemailer from "nodemailer";
interface ContractBlueprint {
type: string;
provider: string | null;
policyNumber: string | null;
startDate: string | null;
endDate: string | null;
premium: number | null;
premiumCurrency: string | null;
summary: string;
}
interface BlockchainEmailData {
documentHash: string;
txHash: string;
blockNumber: number;
blockTimestamp: Date;
network: string;
contractAddress: string;
explorerUrl: string | null;
}
interface ContractAnalysisEmailInput {
to: string;
userDisplayName?: string | null;
contractId: string;
contractFileName: string;
contractTitle: string;
blueprint: ContractBlueprint;
blockchain?: BlockchainEmailData | null;
}
let transporter: nodemailer.Transporter | null = null;
let transportMode: "smtp" | "ethereal" | null = null;
let hasWarnedMissingEmailConfig = false;
const asBoolean = (value: string | undefined, fallback: boolean): boolean => {
if (!value) return fallback;
return value.toLowerCase() === "true" || value === "1";
};
const isEmailConfigured = (): boolean => {
return Boolean(
process.env.EMAIL_HOST &&
process.env.EMAIL_PORT &&
process.env.EMAIL_USER &&
process.env.EMAIL_PASS,
);
};
const warnMissingEmailConfigOnce = () => {
if (hasWarnedMissingEmailConfig) return;
hasWarnedMissingEmailConfig = true;
console.warn(
"Email notifications are disabled. Configure EMAIL_HOST, EMAIL_PORT, EMAIL_USER, EMAIL_PASS, and MAIL_FROM to enable contract summary emails.",
);
};
const getTransporter = async (): Promise<nodemailer.Transporter | null> => {
if (transporter) {
return transporter;
}
if (isEmailConfigured()) {
transportMode = "smtp";
transporter = nodemailer.createTransport({
host: process.env.EMAIL_HOST,
port: Number(process.env.EMAIL_PORT),
secure: asBoolean(
process.env.EMAIL_SECURE,
Number(process.env.EMAIL_PORT) === 465,
),
auth: {
user: process.env.EMAIL_USER,
pass: process.env.EMAIL_PASS,
},
});
return transporter;
}
if (process.env.NODE_ENV !== "production") {
const testAccount = await nodemailer.createTestAccount();
transportMode = "ethereal";
transporter = nodemailer.createTransport({
host: testAccount.smtp.host,
port: testAccount.smtp.port,
secure: testAccount.smtp.secure,
auth: {
user: testAccount.user,
pass: testAccount.pass,
},
});
console.warn(
"Email service is running in development fallback mode using Ethereal. Configure SMTP env vars for real inbox delivery.",
);
return transporter;
}
warnMissingEmailConfigOnce();
return null;
};
const formatPremium = (
premium: number | null,
currency: string | null,
): string => {
if (premium === null || premium === undefined) return "N/A";
const formattedAmount = new Intl.NumberFormat("en-US", {
minimumFractionDigits: 2,
maximumFractionDigits: 2,
}).format(premium);
if (!currency) return formattedAmount;
if (["€", "$", "£"].includes(currency))
return `${currency}${formattedAmount}`;
return `${formattedAmount} ${currency}`;
};
const formatDateValue = (dateValue: string | null): string => {
if (!dateValue) return "N/A";
const date = new Date(dateValue);
if (Number.isNaN(date.getTime())) return dateValue;
return date.toISOString().split("T")[0];
};
const formatContractLink = (contractId: string): string | null => {
const baseUrl =
process.env.NEXT_PUBLIC_APP_URL?.trim() || process.env.APP_URL?.trim();
if (!baseUrl) return null;
return `${baseUrl.replace(/\/$/, "")}/contacts?contract=${contractId}`;
};
export class EmailService {
static async sendContractAnalysisCompletedEmail(
input: ContractAnalysisEmailInput,
): Promise<{
success: boolean;
error?: string;
skipped?: boolean;
previewUrl?: string | null;
}> {
try {
const mailer = await getTransporter();
if (!mailer) {
return {
success: false,
skipped: true,
error: "Email service not configured",
};
}
const from =
process.env.MAIL_FROM?.trim() ||
process.env.EMAIL_USER?.trim() ||
(transportMode === "ethereal"
? "LexiChain <no-reply@ethereal.email>"
: "");
if (!from) {
warnMissingEmailConfigOnce();
return { success: false, skipped: true, error: "MAIL_FROM is missing" };
}
if (!input.to?.trim()) {
return {
success: false,
skipped: true,
error: "Recipient email is missing",
};
}
const recipientName = input.userDisplayName || "there";
const premiumLabel = formatPremium(
input.blueprint.premium,
input.blueprint.premiumCurrency,
);
const contractUrl = formatContractLink(input.contractId);
const blockchainStatus = input.blockchain
? "Registered"
: "Not registered (blockchain unavailable or skipped)";
const textBody = [
`Hello ${recipientName},`,
"",
"Your contract analysis is complete.",
"",
"Blueprint:",
`- Contract title: ${input.contractTitle}`,
`- Original file: ${input.contractFileName}`,
`- Type: ${input.blueprint.type}`,
`- Provider: ${input.blueprint.provider ?? "N/A"}`,
`- Policy number: ${input.blueprint.policyNumber ?? "N/A"}`,
`- Start date: ${formatDateValue(input.blueprint.startDate)}`,
`- End date: ${formatDateValue(input.blueprint.endDate)}`,
`- Premium: ${premiumLabel}`,
"",
"Summary:",
input.blueprint.summary,
"",
"Blockchain proof:",
`- Status: ${blockchainStatus}`,
`- Document hash: ${input.blockchain?.documentHash ?? "N/A"}`,
`- Transaction hash: ${input.blockchain?.txHash ?? "N/A"}`,
`- Block number: ${input.blockchain?.blockNumber ?? "N/A"}`,
`- Block time: ${input.blockchain?.blockTimestamp?.toISOString() ?? "N/A"}`,
`- Network: ${input.blockchain?.network ?? "N/A"}`,
`- Contract address: ${input.blockchain?.contractAddress ?? "N/A"}`,
`- Explorer URL: ${input.blockchain?.explorerUrl ?? "N/A"}`,
"",
contractUrl ? `Open in app: ${contractUrl}` : "",
"",
"Keep this email for your records.",
]
.filter(Boolean)
.join("\n");
const htmlBody = `
<div style="font-family: Arial, sans-serif; line-height: 1.5; color: #0f172a;">
<h2 style="margin-bottom: 12px;">Contract Analysis Completed</h2>
<p>Hello ${recipientName},</p>
<p>Your contract analysis has been completed successfully.</p>
<h3 style="margin-top: 24px; margin-bottom: 8px;">Blueprint</h3>
<ul>
<li><strong>Contract title:</strong> ${input.contractTitle}</li>
<li><strong>Original file:</strong> ${input.contractFileName}</li>
<li><strong>Type:</strong> ${input.blueprint.type}</li>
<li><strong>Provider:</strong> ${input.blueprint.provider ?? "N/A"}</li>
<li><strong>Policy number:</strong> ${input.blueprint.policyNumber ?? "N/A"}</li>
<li><strong>Start date:</strong> ${formatDateValue(input.blueprint.startDate)}</li>
<li><strong>End date:</strong> ${formatDateValue(input.blueprint.endDate)}</li>
<li><strong>Premium:</strong> ${premiumLabel}</li>
</ul>
<h3 style="margin-top: 24px; margin-bottom: 8px;">Summary</h3>
<p>${input.blueprint.summary.replace(/\n/g, "<br />")}</p>
<h3 style="margin-top: 24px; margin-bottom: 8px;">Blockchain Proof</h3>
<ul>
<li><strong>Status:</strong> ${blockchainStatus}</li>
<li><strong>Document hash:</strong> ${input.blockchain?.documentHash ?? "N/A"}</li>
<li><strong>Transaction hash:</strong> ${input.blockchain?.txHash ?? "N/A"}</li>
<li><strong>Block number:</strong> ${input.blockchain?.blockNumber ?? "N/A"}</li>
<li><strong>Block time:</strong> ${input.blockchain?.blockTimestamp?.toISOString() ?? "N/A"}</li>
<li><strong>Network:</strong> ${input.blockchain?.network ?? "N/A"}</li>
<li><strong>Contract address:</strong> ${input.blockchain?.contractAddress ?? "N/A"}</li>
<li><strong>Explorer URL:</strong> ${input.blockchain?.explorerUrl ? `<a href="${input.blockchain.explorerUrl}" target="_blank" rel="noopener noreferrer">Open transaction</a>` : "N/A"}</li>
</ul>
${contractUrl ? `<p><a href="${contractUrl}">Open this contract in your dashboard</a></p>` : ""}
<p style="margin-top: 24px; font-size: 12px; color: #475569;">Keep this email for your records.</p>
</div>
`;
const info = await mailer.sendMail({
from,
to: input.to,
subject: `Contract analyzed: ${input.contractTitle}`,
text: textBody,
html: htmlBody,
});
const previewUrl = nodemailer.getTestMessageUrl(info);
if (previewUrl) {
console.log(`📨 Ethereal preview URL: ${previewUrl}`);
}
return { success: true, previewUrl };
} catch (error) {
console.error("Failed to send analysis completion email:", error);
return {
success: false,
error: error instanceof Error ? error.message : "Unknown email error",
};
}
}
}