PreRelease v2

This commit is contained in:
2026-03-28 23:46:45 +01:00
parent 6bf998a52a
commit 9993bd232f
39 changed files with 3964 additions and 1469 deletions

View File

@@ -0,0 +1,222 @@
import {
NormalizedAnalysis,
SUPPORTED_CONTRACT_TYPES,
SupportedContractType,
ContactInfo,
KeyPerson,
ExplainabilityItem,
} from "./analysis.types";
function mapContractType(rawType: unknown): SupportedContractType {
const value = String(rawType ?? "")
.trim()
.toUpperCase()
.replace(/\s+/g, "_");
if (SUPPORTED_CONTRACT_TYPES.includes(value as SupportedContractType)) {
return value as SupportedContractType;
}
const aliases: Record<string, SupportedContractType> = {
AUTO_INSURANCE: "INSURANCE_AUTO",
HOME_INSURANCE: "INSURANCE_HOME",
HEALTH_INSURANCE: "INSURANCE_HEALTH",
LIFE_INSURANCE: "INSURANCE_LIFE",
MORTGAGE: "LOAN",
CREDIT: "LOAN",
CARD_CREDIT: "CREDIT_CARD",
};
return aliases[value] ?? "OTHER";
}
function toStringOrNull(value: unknown): string | null {
const normalized = String(value ?? "").trim();
return normalized.length > 0 ? normalized : null;
}
function normalizeCurrency(value: unknown): string | null {
const raw = String(value ?? "")
.trim()
.toUpperCase();
if (!raw) return null;
const symbolMap: Record<string, string> = {
"€": "EUR",
$: "USD",
"£": "GBP",
};
if (symbolMap[raw]) {
return symbolMap[raw];
}
// Accept ISO-like 3-letter currencies and common BFSI currencies.
if (/^[A-Z]{3}$/.test(raw)) {
return raw;
}
return null;
}
function toDateOrNull(value: unknown): string | null {
const candidate = String(value ?? "").trim();
if (!candidate) return null;
if (/^\d{4}-\d{2}-\d{2}$/.test(candidate)) {
return candidate;
}
const parsed = new Date(candidate);
if (Number.isNaN(parsed.getTime())) return null;
return parsed.toISOString().slice(0, 10);
}
function toStringList(value: unknown): string[] {
if (!Array.isArray(value)) return [];
return value
.map((item) => String(item ?? "").trim())
.filter((item) => item.length > 0)
.slice(0, 25);
}
function parseContactInfo(input: any): ContactInfo {
return {
name: toStringOrNull(input?.name),
email: toStringOrNull(input?.email),
phone: toStringOrNull(input?.phone),
address: toStringOrNull(input?.address),
role: toStringOrNull(input?.role),
};
}
function parseKeyPeople(input: any): KeyPerson[] {
if (!Array.isArray(input)) return [];
return input.slice(0, 10).map((person) => ({
name: String(person?.name ?? "").trim() || "Unknown",
role: toStringOrNull(person?.role),
email: toStringOrNull(person?.email),
phone: toStringOrNull(person?.phone),
}));
}
function parseRelevantDates(input: any): Array<{
date: string;
description: string;
type: "EXPIRATION" | "RENEWAL" | "PAYMENT" | "REVIEW" | "OTHER";
}> {
if (!Array.isArray(input)) return [];
return input.slice(0, 15).map((dateObj) => {
const dateStr = toDateOrNull(dateObj?.date);
const type = String(dateObj?.type ?? "OTHER").toUpperCase();
const isValidType = [
"EXPIRATION",
"RENEWAL",
"PAYMENT",
"REVIEW",
"OTHER",
].includes(type);
return {
date: dateStr || "0000-01-01",
description:
String(dateObj?.description ?? "")
.trim()
.slice(0, 200) || "Important date",
type: (isValidType ? type : "OTHER") as
| "EXPIRATION"
| "RENEWAL"
| "PAYMENT"
| "REVIEW"
| "OTHER",
};
});
}
function parseExplainability(input: any): ExplainabilityItem[] {
if (!Array.isArray(input)) return [];
return input
.slice(0, 30)
.map((item) => {
const field = String(item?.field ?? "").trim();
const why = String(item?.why ?? "").trim();
const sourceSnippet = String(item?.sourceSnippet ?? "").trim();
if (!field || !why || !sourceSnippet) return null;
const confidenceRaw = Number(item?.sourceHints?.confidence);
const confidence = Number.isFinite(confidenceRaw)
? Math.max(0, Math.min(100, Math.round(confidenceRaw)))
: null;
return {
field: field.slice(0, 80),
why: why.slice(0, 260),
sourceSnippet: sourceSnippet.slice(0, 480),
sourceHints: {
page: toStringOrNull(item?.sourceHints?.page),
section: toStringOrNull(item?.sourceHints?.section),
confidence,
},
} as ExplainabilityItem;
})
.filter((value): value is ExplainabilityItem => value !== null);
}
export function normalizeAnalysis(input: any): NormalizedAnalysis {
const title = String(input?.title || "").trim() || "Untitled Contract";
const summary = String(input?.summary || "").trim();
const extractedText = String(input?.extractedText || "").trim();
if (summary.length < 10) {
throw new Error("Summary is missing or too short.");
}
if (extractedText.length < 30) {
throw new Error("Extracted text is missing or too short.");
}
const premiumValue =
input?.premium === null || input?.premium === undefined
? null
: Number(input.premium);
const premium =
premiumValue !== null && Number.isFinite(premiumValue) && premiumValue >= 0
? Number(premiumValue.toFixed(2))
: null;
const language = toStringOrNull(input?.language) || "en";
return {
title,
type: mapContractType(input?.type),
provider: toStringOrNull(input?.provider),
policyNumber: toStringOrNull(input?.policyNumber),
startDate: toDateOrNull(input?.startDate),
endDate: toDateOrNull(input?.endDate),
premium,
premiumCurrency: normalizeCurrency(input?.premiumCurrency),
summary,
keyPoints: {
guarantees: toStringList(input?.keyPoints?.guarantees),
exclusions: toStringList(input?.keyPoints?.exclusions),
franchise: toStringOrNull(input?.keyPoints?.franchise),
importantDates: toStringList(input?.keyPoints?.importantDates),
explainability: parseExplainability(input?.keyPoints?.explainability),
},
extractedText: extractedText.slice(0, 12000),
language,
keyPeople: parseKeyPeople(input?.keyPeople),
contactInfo: parseContactInfo(input?.contactInfo),
importantContacts: Array.isArray(input?.importantContacts)
? input.importantContacts
.slice(0, 10)
.map((c: any) => parseContactInfo(c))
: [],
relevantDates: parseRelevantDates(input?.relevantDates),
};
}

View File

@@ -0,0 +1,110 @@
function stripMarkdownFences(value: string): string {
return value
.replace(/^```json\s*/i, "")
.replace(/^```\s*/i, "")
.replace(/\s*```$/, "")
.trim();
}
function extractBalancedJson(text: string): string | null {
let start = -1;
let inString = false;
let escaped = false;
const stack: string[] = [];
for (let i = 0; i < text.length; i++) {
const char = text[i];
if (start === -1) {
if (char === "{" || char === "[") {
start = i;
stack.push(char);
}
continue;
}
if (inString) {
if (!escaped && char === "\\") {
escaped = true;
continue;
}
if (!escaped && char === '"') {
inString = false;
}
escaped = false;
continue;
}
if (char === '"') {
inString = true;
continue;
}
if (char === "{" || char === "[") {
stack.push(char);
continue;
}
if (char === "}" || char === "]") {
const last = stack[stack.length - 1];
const isMatch =
(last === "{" && char === "}") || (last === "[" && char === "]");
if (!isMatch) {
return null;
}
stack.pop();
if (stack.length === 0 && start !== -1) {
return text.slice(start, i + 1);
}
}
}
return null;
}
function sanitizeLooseJson(value: string): string {
return value
.replace(/[\u201C\u201D]/g, '"')
.replace(/[\u2018\u2019]/g, "'")
.replace(/,\s*([}\]])/g, "$1")
.trim();
}
export function parseJsonResponse(text: string): unknown {
if (!text || typeof text !== "string" || text.trim().length === 0) {
throw new Error("AI response is empty.");
}
const cleaned = stripMarkdownFences(text);
try {
return JSON.parse(cleaned);
} catch {
// continue to robust fallback
}
const extracted = extractBalancedJson(cleaned);
if (!extracted) {
throw new Error(
`No complete JSON object found in AI response. Preview: ${cleaned.slice(0, 220)}`,
);
}
try {
return JSON.parse(extracted);
} catch {
const sanitized = sanitizeLooseJson(extracted);
try {
return JSON.parse(sanitized);
} catch (error) {
const message =
error instanceof Error ? error.message : "unknown parse error";
throw new Error(
`JSON parse failed after recovery attempts: ${message}. Preview: ${sanitized.slice(0, 220)}`,
);
}
}
}

View File

@@ -0,0 +1,165 @@
export function buildAnalysisPrompt(input?: {
adaptiveContext?: string;
fileName?: string;
}): string {
return `You are an expert in contract analysis for BFSI and general legal/business contracts.
You support multi-language analysis and will automatically detect the contract language.
Document name: ${input?.fileName ?? "Unknown"}
${input?.adaptiveContext ?? ""}
Analyze this contract document completely and return JSON in the EXACT structure below.
CRITICAL: Your response must be VALID, PARSEABLE JSON only. Do not include markdown, backticks, or explanations.
{
"language": "en",
"title": "Descriptive contract title",
"type": "INSURANCE_AUTO",
"provider": "Company or institution name",
"policyNumber": "Policy/contract/reference number",
"startDate": "2024-01-01",
"endDate": "2025-12-31",
"premium": 1200.50,
"premiumCurrency": "TND",
"summary": "Professional, comprehensive 4-6 sentence summary in the contract's language. Include: main parties, key obligations, coverage/benefits, exclusions, important deadlines, key contacts. Use **bold** for: names, numbers, dates, amounts, important terms.",
"keyPoints": {
"guarantees": ["**Main Benefit 1**: Description", "**Main Benefit 2**: Description"],
"exclusions": ["**Exclusion 1**: Description with impact", "**Exclusion 2**: Description"],
"franchise": "**Deductible/Penalty**: €150 per claim or equivalent",
"importantDates": ["**Renewal Date**: 31 December annually", "**Payment Deadline**: 15th of each month"],
"explainability": [
{
"field": "endDate",
"why": "Extracted as contract expiration because the clause explicitly sets validity end.",
"sourceSnippet": "Durée du prêt: échéance finale fixée au 10 avril 2044.",
"sourceHints": { "page": "1", "section": "Durée/Échéancier", "confidence": 92 }
},
{
"field": "premium",
"why": "Detected monetary obligation from insurance/fee clause.",
"sourceSnippet": "Coût total estimé de 18 240,00 TND.",
"sourceHints": { "page": "2", "section": "Coût / Prime", "confidence": 88 }
}
]
},
"keyPeople": [
{"name": "**John Smith**", "role": "Policy Holder", "email": "john@example.com", "phone": "+33612345678"},
{"name": "**Jane Doe**", "role": "Insurance Agent", "email": "jane@insurer.com", "phone": "+33987654321"}
],
"contactInfo": {
"name": "**Policy Holder Name**",
"email": "holder@email.com",
"phone": "+33612345678",
"address": "123 Main Street, City, Postal Code",
"role": "Insured Person"
},
"importantContacts": [
{"name": "**Claims Department**", "email": "claims@insurer.com", "phone": "+33800000000"},
{"name": "**Customer Service**", "email": "support@insurer.com", "phone": "+33800111111"}
],
"relevantDates": [
{"date": "2025-12-31", "description": "**Policy Expiration Date**", "type": "EXPIRATION"},
{"date": "2025-10-31", "description": "**Renewal Notice Deadline** (60 days before expiration)", "type": "RENEWAL"},
{"date": "1970-01-15", "description": "**Monthly Payment Due Date**", "type": "PAYMENT"}
],
"extractedText": "Most relevant extracted text, preserving original structure and keywords. Include key clauses, definitions, obligations. Max 12000 chars.",
"contractValidation": {
"isValidContract": true,
"confidence": 88,
"reason": null
}
}
TYPE must be one of:
INSURANCE_AUTO, INSURANCE_HOME, INSURANCE_HEALTH, INSURANCE_LIFE, LOAN, CREDIT_CARD, INVESTMENT, OTHER
CRITICAL FIELD EXTRACTION RULES:
1. **Language Detection**: Detect and return the contract's primary language (en, fr, de, es, it, pt, etc.). If mixed, return dominant language.
2. **Summary (VERY IMPORTANT)**:
- Write 4-6 comprehensive sentences covering: parties involved, contract scope, key obligations, main coverage/benefits, critical exclusions, important deadlines
- Use **Party Name** for persons/entities mentioned
- Use **number** for all quantities, dates, amounts, percentages
- Use **YYYY-MM-DD** format for dates with **bold**
- Language: Professional business French, English, or contract's native language
- MUST be detailed enough that reader understands contract without opening PDF
3. **Key People Extraction**:
- Extract all named individuals: policy holders, insured parties, beneficiaries, signatories, agents, brokers
- Include roles, contact methods when visible in contract
- Use **bold** for names: {"name": "**John Smith**", ...}
4. **Contact Information**:
- contactInfo: Details of PRIMARY policy holder or contract party
- importantContacts: Agent, broker, support teams, claims department with **bold** for names
5. **Relevant Dates**:
- Extract ALL dates with business meaning: expiration, renewal, payment due dates, review dates
- For recurring dates (monthly, annually): show pattern like "1970-01-15" for "15th of each month"
- Include type: EXPIRATION, RENEWAL, PAYMENT, REVIEW, or OTHER
- Each date must have clear **bold** description explaining its significance
6. **Key Points**:
- Use **bold** for: benefit names, exclusion types, monetary amounts, coverage limits
- Example: "**Motor Coverage**: Collision and theft protection up to **€50,000**"
- Make exclusions explicit and impactful
- Include franchise/deductible with bold currency and amount
7. **Guarantees & Exclusions**:
- Be specific: "**Theft Coverage** includes keys, GPS, and aftermarket electronics"
- For exclusions, explain impact: "**Mechanical wear excluded** - means breakdowns in years 3+ not covered"
8. **Email/Phone Extraction**: If present in contract, extract:
- Email addresses in format: contact@domain.com
- Phone numbers with country code: +33 for France, +44 for UK, etc.
9. **Explainability (MANDATORY)**:
- In keyPoints.explainability, include at least 6 items for critical fields when available:
title, provider, policyNumber, startDate, endDate, premium, key obligations, key exclusions.
- Each item MUST contain:
- field: exact extracted field name
- why: one sentence explaining extraction logic
- sourceSnippet: short verbatim quote from document supporting the field
- sourceHints.page: page number if inferable, otherwise null
- sourceHints.section: section title/heading when inferable, otherwise null
- sourceHints.confidence: 0..100 confidence for that field extraction
- Keep sourceSnippet short (max 280 chars) but sufficiently specific to audit.
- Never invent snippet text not present in document.
Field Type Rules:
- dates: ISO format YYYY-MM-DD or null. For recurring patterns, use canonical date (e.g., "0000-01-15" for "15th each month")
- premium: Positive number or null. NO currency symbols and NO currency conversion.
- premiumCurrency: Use the exact currency mentioned in contract (e.g., TND, EUR, USD, MAD, DZD, GBP, CHF). Never convert currency.
- keyPeople, contactInfo arrays: Can include null values for missing fields
- type: MUST be one of the 8 contract types. Default to OTHER if unsure.
- confidence: 1-100, higher for clear data, lower for ambiguous/partial info
Validation:
- isValidContract: true for all actual contracts (even type=OTHER), false only for non-contract files
- reason: null if valid, brief explanation if invalid
MUST return VALID JSON parseable with JSON.parse() in ONE line of pure JSON.`;
}
export function buildPrevalidationPrompt(fileName?: string): string {
return `You are validating whether an uploaded document is a legal/financial contract in any language.
File name: ${fileName ?? "Unknown"}
Return ONLY JSON (no markdown, no backticks, no explanations):
{
"isValidContract": true,
"confidence": 0,
"reason": null
}
Rules:
- isValidContract=false for invoices, receipts, identity cards, random photos/screenshots, blank pages, flyers, or unrelated files
- confidence is an integer from 0 to 100
- reason must be concise and user-friendly when invalid
- If valid, reason can be null
- This must be valid JSON parseable with JSON.parse()
- Return ONLY the JSON object, nothing else`;
}

View File

@@ -0,0 +1,80 @@
export const SUPPORTED_CONTRACT_TYPES = [
"INSURANCE_AUTO",
"INSURANCE_HOME",
"INSURANCE_HEALTH",
"INSURANCE_LIFE",
"LOAN",
"CREDIT_CARD",
"INVESTMENT",
"OTHER",
] as const;
export type SupportedContractType = (typeof SUPPORTED_CONTRACT_TYPES)[number];
export type AnalyzeOptions = {
userId?: string;
fileName?: string;
maxRetries?: number;
};
export type ContactInfo = {
name?: string | null;
email?: string | null;
phone?: string | null;
address?: string | null;
role?: string | null;
};
export type KeyPerson = {
name: string;
role?: string | null;
email?: string | null;
phone?: string | null;
};
export type ExplainabilityItem = {
field: string;
why: string;
sourceSnippet: string;
sourceHints?: {
page?: string | null;
section?: string | null;
confidence?: number | null;
};
};
export type NormalizedAnalysis = {
title: string;
type: SupportedContractType;
provider: string | null;
policyNumber: string | null;
startDate: string | null;
endDate: string | null;
premium: number | null;
premiumCurrency?: string | null;
summary: string;
keyPoints: {
guarantees: string[];
exclusions: string[];
franchise: string | null;
importantDates: string[];
explainability?: ExplainabilityItem[];
};
extractedText: string;
// New enhanced fields
language?: string | null;
keyPeople: KeyPerson[];
contactInfo: ContactInfo;
importantContacts: ContactInfo[];
relevantDates: Array<{
date: string;
description: string;
type: "EXPIRATION" | "RENEWAL" | "PAYMENT" | "REVIEW" | "OTHER";
}>;
};
export type ContractPrecheckResult = {
isValidContract: boolean;
confidence: number;
reason: string | null;
};