Files
LexiChain/test-mistral.js
2026-05-03 13:26:31 +01:00

101 lines
5.5 KiB
JavaScript

const fs = require('fs');
const sysPrompt = `You are an expert contract analysis engine for the BFSI (Banking, Financial Services, and Insurance) sector.
You receive the full text content of a contract document and must extract structured information from it.
ABSOLUTE RULES — VIOLATION OF THESE IS A CRITICAL FAILURE:
1. Return ONLY valid, parseable JSON — no markdown, no backticks, no explanations, no commentary.
2. EVERY value you output MUST come directly from the document text provided to you.
3. If a piece of information does NOT exist in the document text, you MUST use null (for strings/numbers) or [] (for arrays). NEVER invent, assume, or guess data.
4. Do NOT copy example values from the schema description — they are placeholders, not real data.
5. The "extractedText" field MUST contain actual verbatim text from the document — not a summary, not examples.
JSON SCHEMA (use exact field names):
{
"language": "<ISO 639-1 code detected from document>",
"title": "<exact contract title from document or null>",
"type": "<one of: INSURANCE_AUTO, INSURANCE_HOME, INSURANCE_HEALTH, INSURANCE_LIFE, LOAN, CREDIT_CARD, INVESTMENT, OTHER>",
"provider": "<company/institution name from document or null>",
"policyNumber": "<policy/contract number from document or null>",
"startDate": "<YYYY-MM-DD from document or null>",
"endDate": "<YYYY-MM-DD from document or null>",
"premium": <number from document or null — NO currency symbols>,
"premiumCurrency": "<currency code from document or null>",
"summary": "<4-6 sentences summarizing the actual contract content>",
"keyPoints": {
"guarantees": ["<actual guarantee from document>"],
"exclusions": ["<actual exclusion from document>"],
"franchise": "<deductible/penalty from document or null>",
"importantDates": ["<actual date from document with description>"],
"explainability": [
{
"field": "<field name>",
"why": "<why this value was extracted>",
"sourceSnippet": "<verbatim quote from document>",
"sourceHints": { "page": "<page or null>", "section": "<section or null>", "confidence": <0-100> }
}
]
},
"keyPeople": [{"name": "<from document>", "role": "<from document or null>", "email": "<from document or null>", "phone": "<from document or null>"}],
"contactInfo": {"name": "<from document or null>", "email": null, "phone": null, "address": null, "role": null},
"importantContacts": [],
"relevantDates": [{"date": "<YYYY-MM-DD>", "description": "<from document>", "type": "<EXPIRATION|RENEWAL|PAYMENT|REVIEW|OTHER>"}],
"extractedText": "<verbatim text from the document, max 12000 chars>",
"contractValidation": {
"isValidContract": true,
"confidence": <0-100 reflecting how much data you actually found>,
"reason": null
}
}
FIELD RULES:
- All dates: ISO YYYY-MM-DD or null
- premium: positive number or null — NO currency symbols, NO text
- type: must be exactly one of the 8 values listed
- summary: 4-6 professional sentences about THIS specific contract
- extractedText: must contain at least 30 characters of ACTUAL document content
- explainability: at least 4 items with real sourceSnippets from the document
- confidence: reflects how much data you actually found (not how confident the model is)
- Parse localized number formats correctly (1.234,56 vs 1,234.56)
- Detect the contract language and set "language" accordingly
You are replacing a more capable multimodal model (Gemini) as a fallback. Your output quality MUST match production standards. ACCURACY is more important than completeness — it is better to return null than to guess.`;
const prompt = `--- BEGIN GROUNDED DOCUMENT TEXT (AUTHORITATIVE SOURCE) ---
CONFIDENTIALITY AGREEMENT
This Confidentiality Agreement (the "Agreement") is entered into as of May 1, 2025 (the "Effective Date"), by and between Acme Corp ("Disclosing Party") and Beta Inc ("Receiving Party").
1. Confidential Information. "Confidential Information" means all non-public information disclosed by the Disclosing Party to the Receiving Party.
2. Obligations. The Receiving Party shall hold and maintain the Confidential Information in strictest confidence.
3. Term. This Agreement shall remain in effect for a period of two (2) years from the Effective Date.
Signatures:
John Doe, CEO Acme Corp
Jane Smith, VP Beta Inc
--- END GROUNDED DOCUMENT TEXT ---
MISTRAL FALLBACK RULES:
- Extract fields ONLY from the grounded document text above. This text is the full contract content.
- Do not invent, assume, or hallucinate any values not explicitly present in the above text.
- If a field's data is not found in the text, use null (for strings/numbers) or [] (for arrays).
- Dates: convert any date format found in the text to YYYY-MM-DD.
- Numbers: parse localized formats (comma vs period) correctly before setting numeric fields.
- contractValidation.confidence should reflect how much data you could extract from the text.
IMPORTANT: Return ONLY valid JSON and preserve the required schema exactly. Do not add any text outside of the JSON object.`;
fetch('https://api.mistral.ai/v1/chat/completions', {
method: 'POST',
headers: {
'Authorization': 'Bearer 7yRx3izDA2ECDblZvAaUoZhgQnYqiiKj',
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: 'mistral-large-latest',
temperature: 0,
top_p: 1,
response_format: { type: 'json_object' },
messages: [
{ role: 'system', content: sysPrompt },
{ role: 'user', content: prompt }
]
})
}).then(r => r.json()).then(data => console.log(data.choices[0].message.content)).catch(console.error);