From 91257351737d2748de2fcdfe648d40d0169cd6de Mon Sep 17 00:00:00 2001 From: YUKI VACHOT Date: Mon, 10 Jun 2024 09:34:36 +0200 Subject: [PATCH] First commit --- README.md | 4 ++ index.html | 42 +++++++++++++++++ script.js | 136 +++++++++++++++++++++++++++++++++++++++++++++++++++++ styles.css | 91 +++++++++++++++++++++++++++++++++++ 4 files changed, 273 insertions(+) create mode 100644 README.md create mode 100644 index.html create mode 100644 script.js create mode 100644 styles.css diff --git a/README.md b/README.md new file mode 100644 index 0000000..4ab0dd4 --- /dev/null +++ b/README.md @@ -0,0 +1,4 @@ + +![Pages Workflow](https://github.com/NyxiumYuuki/PySpark_Schema-Comparison-Tool/actions/workflows/pages/pages-build-deployment/badge.svg) +# PySpark - Schema Comparison Tool (Current and Previous) +[Page PySpark_Schema-Comparison-Tool](https://nyxiumyuuki.github.io/PySpark_Schema-Comparison-Tool/) \ No newline at end of file diff --git a/index.html b/index.html new file mode 100644 index 0000000..402218c --- /dev/null +++ b/index.html @@ -0,0 +1,42 @@ + + + + + + Palantir Foundry - Schema Comparison Tool (Current and Previous) + + + +
+

PySpark - Schema Comparison Tool (Current and Previous)

+
+ Info: +

We are not getting which nested field the fields are coming, but the schema difference should work.

+
+
+ Help: +

Ensure your text follows this format:

+
+StructType([...]) | Previous: StructType([...])
+            
+
+ + + +
+ + + diff --git a/script.js b/script.js new file mode 100644 index 0000000..213e348 --- /dev/null +++ b/script.js @@ -0,0 +1,136 @@ +function compareSchemas() { + const text = document.getElementById("schemaText").value; + const result = parseSchemas(text); + if (result) { + displayResult(result); + } +} + +function parseSchemas(text) { + const oldSchemaMatch = text.match(/Previous: StructType\(\[(.*)\]\)/s); + const newSchemaMatch = text.match(/StructType\(\[(.*)\]\) \| Previous:/s); + + if (!oldSchemaMatch || !newSchemaMatch) { + alert("Invalid input. Please ensure the text contains both old and new schema definitions."); + return null; + } + + const oldSchema = parseFields(oldSchemaMatch[1]); + const newSchema = parseFields(newSchemaMatch[1]); + + console.log('New Schema:', newSchema); + console.log('Old Schema:', oldSchema); + + const added = findDifferences(newSchema, oldSchema); + const removed = findDifferences(oldSchema, newSchema); + const modified = findModifiedFields(oldSchema, newSchema); + + return { added, removed, modified }; +} + +function parseFields(fieldsText) { + const fields = []; + const regex = /StructField\('(\w+)',\s*([^,]+),\s*(True|False)\)/gs; + let match; + + while ((match = regex.exec(fieldsText)) !== null) { + const [_, name, type, nullable] = match; + + if (type.startsWith('StructType')) { + const nestedFieldsMatch = type.match(/StructType\(\[(.*)\]\)/s); + fields.push({ + name: name, + type: 'StructType', + nullable: nullable === 'True', + nested: nestedFieldsMatch ? parseFields(nestedFieldsMatch[1]) : [] + }); + } else { + fields.push({ + name: name, + type: type, + nullable: nullable === 'True' + }); + } + } + + return fields; +} + +function findDifferences(schema1, schema2) { + const differences = []; + + schema1.forEach(field => { + const matchingField = schema2.find(f => f.name === field.name); + if (!matchingField) { + differences.push(field); + } else if (field.nested) { + const nestedDifferences = findDifferences(field.nested, matchingField.nested || []); + nestedDifferences.forEach(nestedField => { + differences.push({ + name: `${field.name}.${nestedField.name}`, + type: nestedField.type, + nullable: nestedField.nullable, + nested: nestedField.nested + }); + }); + } + }); + + return differences; +} + +function findModifiedFields(oldSchema, newSchema) { + const modified = []; + + newSchema.forEach(field => { + const oldField = oldSchema.find(f => f.name === field.name); + if (oldField && JSON.stringify(oldField) !== JSON.stringify(field)) { + if (field.nested) { + const nestedModified = findModifiedFields(oldField.nested || [], field.nested); + nestedModified.forEach(nestedField => { + modified.push({ + name: `${field.name}.${nestedField.name}`, + type: nestedField.type, + nullable: nestedField.nullable, + nested: nestedField.nested + }); + }); + } else { + modified.push(field); + } + } + }); + + return modified; +} + +function displayResult(result) { + document.getElementById("result").style.display = "block"; + + const addedTable = document.getElementById("addedTable"); + const removedTable = document.getElementById("removedTable"); + const modifiedTable = document.getElementById("modifiedTable"); + + populateTable(addedTable, result.added); + populateTable(removedTable, result.removed); + populateTable(modifiedTable, result.modified); +} + +function populateTable(table, data) { + table.innerHTML = ` + + Name + Type + Nullable + Nested Fields + + `; + + data.forEach(field => { + const row = table.insertRow(); + row.insertCell(0).textContent = field.name; + row.insertCell(1).textContent = field.type; + row.insertCell(2).textContent = field.nullable; + row.insertCell(3).textContent = field.nested ? JSON.stringify(field.nested) : 'None'; + }); +} diff --git a/styles.css b/styles.css new file mode 100644 index 0000000..21e4d63 --- /dev/null +++ b/styles.css @@ -0,0 +1,91 @@ +body { + font-family: Arial, sans-serif; + background-color: #f5f5f5; + margin: 0; + padding: 0; + display: flex; + justify-content: center; + align-items: flex-start; + height: 100vh; +} + +.container { + background-color: #fff; + padding: 20px; + border-radius: 8px; + box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); + width: 80%; + max-width: 1000px; + margin-top: 20px; +} + +h1, h2, h3 { + text-align: center; + margin: 0; + padding: 10px 0; +} + +textarea { + width: 100%; + height: 150px; + margin: 10px 0; + padding: 10px; + font-size: 14px; + border: 1px solid #ccc; + border-radius: 4px; + resize: vertical; +} + +button { + width: 100%; + padding: 10px; + font-size: 16px; + color: #fff; + background-color: #007bff; + border: none; + border-radius: 4px; + cursor: pointer; + margin-bottom: 20px; +} + +button:hover { + background-color: #0056b3; +} + +pre { + background-color: #f0f0f0; + padding: 10px; + border-radius: 4px; + overflow-x: auto; +} + +#helpPanel { + margin-top: 20px; + padding: 10px; + border: 1px solid #ddd; + background: cornflowerblue; +} + +#infoPanel { + margin-top: 20px; + padding: 10px; + border: 1px solid #ddd; + background: orange; +} + +table { + width: 100%; + border-collapse: collapse; + margin-bottom: 20px; +} +th, td { + border: 1px solid black; + padding: 8px; + text-align: left; +} +th { + background-color: #f2f2f2; +} +.result-section { + margin-bottom: 30px; +} \ No newline at end of file