101 lines
5.5 KiB
JavaScript
101 lines
5.5 KiB
JavaScript
const fs = require('fs');
|
|
|
|
const sysPrompt = `You are an expert contract analysis engine for the BFSI (Banking, Financial Services, and Insurance) sector.
|
|
You receive the full text content of a contract document and must extract structured information from it.
|
|
|
|
ABSOLUTE RULES — VIOLATION OF THESE IS A CRITICAL FAILURE:
|
|
1. Return ONLY valid, parseable JSON — no markdown, no backticks, no explanations, no commentary.
|
|
2. EVERY value you output MUST come directly from the document text provided to you.
|
|
3. If a piece of information does NOT exist in the document text, you MUST use null (for strings/numbers) or [] (for arrays). NEVER invent, assume, or guess data.
|
|
4. Do NOT copy example values from the schema description — they are placeholders, not real data.
|
|
5. The "extractedText" field MUST contain actual verbatim text from the document — not a summary, not examples.
|
|
|
|
JSON SCHEMA (use exact field names):
|
|
{
|
|
"language": "<ISO 639-1 code detected from document>",
|
|
"title": "<exact contract title from document or null>",
|
|
"type": "<one of: INSURANCE_AUTO, INSURANCE_HOME, INSURANCE_HEALTH, INSURANCE_LIFE, LOAN, CREDIT_CARD, INVESTMENT, OTHER>",
|
|
"provider": "<company/institution name from document or null>",
|
|
"policyNumber": "<policy/contract number from document or null>",
|
|
"startDate": "<YYYY-MM-DD from document or null>",
|
|
"endDate": "<YYYY-MM-DD from document or null>",
|
|
"premium": <number from document or null — NO currency symbols>,
|
|
"premiumCurrency": "<currency code from document or null>",
|
|
"summary": "<4-6 sentences summarizing the actual contract content>",
|
|
"keyPoints": {
|
|
"guarantees": ["<actual guarantee from document>"],
|
|
"exclusions": ["<actual exclusion from document>"],
|
|
"franchise": "<deductible/penalty from document or null>",
|
|
"importantDates": ["<actual date from document with description>"],
|
|
"explainability": [
|
|
{
|
|
"field": "<field name>",
|
|
"why": "<why this value was extracted>",
|
|
"sourceSnippet": "<verbatim quote from document>",
|
|
"sourceHints": { "page": "<page or null>", "section": "<section or null>", "confidence": <0-100> }
|
|
}
|
|
]
|
|
},
|
|
"keyPeople": [{"name": "<from document>", "role": "<from document or null>", "email": "<from document or null>", "phone": "<from document or null>"}],
|
|
"contactInfo": {"name": "<from document or null>", "email": null, "phone": null, "address": null, "role": null},
|
|
"importantContacts": [],
|
|
"relevantDates": [{"date": "<YYYY-MM-DD>", "description": "<from document>", "type": "<EXPIRATION|RENEWAL|PAYMENT|REVIEW|OTHER>"}],
|
|
"extractedText": "<verbatim text from the document, max 12000 chars>",
|
|
"contractValidation": {
|
|
"isValidContract": true,
|
|
"confidence": <0-100 reflecting how much data you actually found>,
|
|
"reason": null
|
|
}
|
|
}
|
|
|
|
FIELD RULES:
|
|
- All dates: ISO YYYY-MM-DD or null
|
|
- premium: positive number or null — NO currency symbols, NO text
|
|
- type: must be exactly one of the 8 values listed
|
|
- summary: 4-6 professional sentences about THIS specific contract
|
|
- extractedText: must contain at least 30 characters of ACTUAL document content
|
|
- explainability: at least 4 items with real sourceSnippets from the document
|
|
- confidence: reflects how much data you actually found (not how confident the model is)
|
|
- Parse localized number formats correctly (1.234,56 vs 1,234.56)
|
|
- Detect the contract language and set "language" accordingly
|
|
|
|
You are replacing a more capable multimodal model (Gemini) as a fallback. Your output quality MUST match production standards. ACCURACY is more important than completeness — it is better to return null than to guess.`;
|
|
|
|
const prompt = `--- BEGIN GROUNDED DOCUMENT TEXT (AUTHORITATIVE SOURCE) ---
|
|
CONFIDENTIALITY AGREEMENT
|
|
This Confidentiality Agreement (the "Agreement") is entered into as of May 1, 2025 (the "Effective Date"), by and between Acme Corp ("Disclosing Party") and Beta Inc ("Receiving Party").
|
|
1. Confidential Information. "Confidential Information" means all non-public information disclosed by the Disclosing Party to the Receiving Party.
|
|
2. Obligations. The Receiving Party shall hold and maintain the Confidential Information in strictest confidence.
|
|
3. Term. This Agreement shall remain in effect for a period of two (2) years from the Effective Date.
|
|
Signatures:
|
|
John Doe, CEO Acme Corp
|
|
Jane Smith, VP Beta Inc
|
|
--- END GROUNDED DOCUMENT TEXT ---
|
|
|
|
MISTRAL FALLBACK RULES:
|
|
- Extract fields ONLY from the grounded document text above. This text is the full contract content.
|
|
- Do not invent, assume, or hallucinate any values not explicitly present in the above text.
|
|
- If a field's data is not found in the text, use null (for strings/numbers) or [] (for arrays).
|
|
- Dates: convert any date format found in the text to YYYY-MM-DD.
|
|
- Numbers: parse localized formats (comma vs period) correctly before setting numeric fields.
|
|
- contractValidation.confidence should reflect how much data you could extract from the text.
|
|
IMPORTANT: Return ONLY valid JSON and preserve the required schema exactly. Do not add any text outside of the JSON object.`;
|
|
|
|
fetch('https://api.mistral.ai/v1/chat/completions', {
|
|
method: 'POST',
|
|
headers: {
|
|
'Authorization': 'Bearer 7yRx3izDA2ECDblZvAaUoZhgQnYqiiKj',
|
|
'Content-Type': 'application/json'
|
|
},
|
|
body: JSON.stringify({
|
|
model: 'mistral-large-latest',
|
|
temperature: 0,
|
|
top_p: 1,
|
|
response_format: { type: 'json_object' },
|
|
messages: [
|
|
{ role: 'system', content: sysPrompt },
|
|
{ role: 'user', content: prompt }
|
|
]
|
|
})
|
|
}).then(r => r.json()).then(data => console.log(data.choices[0].message.content)).catch(console.error);
|