First commit

This commit is contained in:
YUKI VACHOT 2024-06-10 09:34:36 +02:00
commit 9125735173
4 changed files with 273 additions and 0 deletions

4
README.md Normal file
View file

@ -0,0 +1,4 @@
![Pages Workflow](https://github.com/NyxiumYuuki/PySpark_Schema-Comparison-Tool/actions/workflows/pages/pages-build-deployment/badge.svg)
# PySpark - Schema Comparison Tool (Current and Previous)
[Page PySpark_Schema-Comparison-Tool](https://nyxiumyuuki.github.io/PySpark_Schema-Comparison-Tool/)

42
index.html Normal file
View file

@ -0,0 +1,42 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Palantir Foundry - Schema Comparison Tool (Current and Previous)</title>
<link rel="stylesheet" href="styles.css">
</head>
<body>
<div class="container">
<h1>PySpark - Schema Comparison Tool (Current and Previous)</h1>
<div id="infoPanel">
<strong>Info:</strong>
<p>We are not getting which nested field the fields are coming, but the schema difference should work.</p>
</div>
<div id="helpPanel">
<strong>Help:</strong>
<p>Ensure your text follows this format:</p>
<pre>
StructType([...]) | Previous: StructType([...])
</pre>
</div>
<textarea id="schemaText" placeholder="Paste the text with both previous and current schema here..."></textarea>
<button onclick="compareSchemas()">Compare</button>
<div id="result" style="display: none;">
<div class="result-section">
<h2>Added Fields</h2>
<table id="addedTable"></table>
</div>
<div class="result-section">
<h2>Removed Fields</h2>
<table id="removedTable"></table>
</div>
<div class="result-section">
<h2>Modified Fields</h2>
<table id="modifiedTable"></table>
</div>
</div>
</div>
<script src="script.js"></script>
</body>
</html>

136
script.js Normal file
View file

@ -0,0 +1,136 @@
function compareSchemas() {
const text = document.getElementById("schemaText").value;
const result = parseSchemas(text);
if (result) {
displayResult(result);
}
}
function parseSchemas(text) {
const oldSchemaMatch = text.match(/Previous: StructType\(\[(.*)\]\)/s);
const newSchemaMatch = text.match(/StructType\(\[(.*)\]\) \| Previous:/s);
if (!oldSchemaMatch || !newSchemaMatch) {
alert("Invalid input. Please ensure the text contains both old and new schema definitions.");
return null;
}
const oldSchema = parseFields(oldSchemaMatch[1]);
const newSchema = parseFields(newSchemaMatch[1]);
console.log('New Schema:', newSchema);
console.log('Old Schema:', oldSchema);
const added = findDifferences(newSchema, oldSchema);
const removed = findDifferences(oldSchema, newSchema);
const modified = findModifiedFields(oldSchema, newSchema);
return { added, removed, modified };
}
function parseFields(fieldsText) {
const fields = [];
const regex = /StructField\('(\w+)',\s*([^,]+),\s*(True|False)\)/gs;
let match;
while ((match = regex.exec(fieldsText)) !== null) {
const [_, name, type, nullable] = match;
if (type.startsWith('StructType')) {
const nestedFieldsMatch = type.match(/StructType\(\[(.*)\]\)/s);
fields.push({
name: name,
type: 'StructType',
nullable: nullable === 'True',
nested: nestedFieldsMatch ? parseFields(nestedFieldsMatch[1]) : []
});
} else {
fields.push({
name: name,
type: type,
nullable: nullable === 'True'
});
}
}
return fields;
}
function findDifferences(schema1, schema2) {
const differences = [];
schema1.forEach(field => {
const matchingField = schema2.find(f => f.name === field.name);
if (!matchingField) {
differences.push(field);
} else if (field.nested) {
const nestedDifferences = findDifferences(field.nested, matchingField.nested || []);
nestedDifferences.forEach(nestedField => {
differences.push({
name: `${field.name}.${nestedField.name}`,
type: nestedField.type,
nullable: nestedField.nullable,
nested: nestedField.nested
});
});
}
});
return differences;
}
function findModifiedFields(oldSchema, newSchema) {
const modified = [];
newSchema.forEach(field => {
const oldField = oldSchema.find(f => f.name === field.name);
if (oldField && JSON.stringify(oldField) !== JSON.stringify(field)) {
if (field.nested) {
const nestedModified = findModifiedFields(oldField.nested || [], field.nested);
nestedModified.forEach(nestedField => {
modified.push({
name: `${field.name}.${nestedField.name}`,
type: nestedField.type,
nullable: nestedField.nullable,
nested: nestedField.nested
});
});
} else {
modified.push(field);
}
}
});
return modified;
}
function displayResult(result) {
document.getElementById("result").style.display = "block";
const addedTable = document.getElementById("addedTable");
const removedTable = document.getElementById("removedTable");
const modifiedTable = document.getElementById("modifiedTable");
populateTable(addedTable, result.added);
populateTable(removedTable, result.removed);
populateTable(modifiedTable, result.modified);
}
function populateTable(table, data) {
table.innerHTML = `
<tr>
<th>Name</th>
<th>Type</th>
<th>Nullable</th>
<th>Nested Fields</th>
</tr>
`;
data.forEach(field => {
const row = table.insertRow();
row.insertCell(0).textContent = field.name;
row.insertCell(1).textContent = field.type;
row.insertCell(2).textContent = field.nullable;
row.insertCell(3).textContent = field.nested ? JSON.stringify(field.nested) : 'None';
});
}

91
styles.css Normal file
View file

@ -0,0 +1,91 @@
body {
font-family: Arial, sans-serif;
background-color: #f5f5f5;
margin: 0;
padding: 0;
display: flex;
justify-content: center;
align-items: flex-start;
height: 100vh;
}
.container {
background-color: #fff;
padding: 20px;
border-radius: 8px;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
width: 80%;
max-width: 1000px;
margin-top: 20px;
}
h1, h2, h3 {
text-align: center;
margin: 0;
padding: 10px 0;
}
textarea {
width: 100%;
height: 150px;
margin: 10px 0;
padding: 10px;
font-size: 14px;
border: 1px solid #ccc;
border-radius: 4px;
resize: vertical;
}
button {
width: 100%;
padding: 10px;
font-size: 16px;
color: #fff;
background-color: #007bff;
border: none;
border-radius: 4px;
cursor: pointer;
margin-bottom: 20px;
}
button:hover {
background-color: #0056b3;
}
pre {
background-color: #f0f0f0;
padding: 10px;
border-radius: 4px;
overflow-x: auto;
}
#helpPanel {
margin-top: 20px;
padding: 10px;
border: 1px solid #ddd;
background: cornflowerblue;
}
#infoPanel {
margin-top: 20px;
padding: 10px;
border: 1px solid #ddd;
background: orange;
}
table {
width: 100%;
border-collapse: collapse;
margin-bottom: 20px;
}
th, td {
border: 1px solid black;
padding: 8px;
text-align: left;
}
th {
background-color: #f2f2f2;
}
.result-section {
margin-bottom: 30px;
}