mengw15 commented on code in PR #5258: URL: https://github.com/apache/texera/pull/5258#discussion_r3444142888
########## notebook-migration-service/src/main/scala/org/apache/texera/service/resource/NotebookMigrationResource.scala: ########## @@ -0,0 +1,391 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.texera.service.resource + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import com.typesafe.scalalogging.LazyLogging +import jakarta.ws.rs._ +import jakarta.ws.rs.core._ +import org.apache.texera.dao.SqlServer +import org.jooq.JSONB +import org.apache.texera.dao.jooq.generated.tables.Notebook +import org.apache.texera.dao.jooq.generated.tables.WorkflowNotebookMapping +import java.net.{HttpURLConnection, URL} +import java.nio.charset.StandardCharsets +import scala.util.control.NonFatal +import org.apache.texera.common.config.StorageConfig + +object NotebookMigrationResource extends LazyLogging { + + private val mapper: ObjectMapper = new ObjectMapper().registerModule(DefaultScalaModule) + + // Build an error response body via the mapper so the message is JSON-escaped; interpolating + // e.getMessage directly produces malformed JSON when it contains quotes, backslashes, or newlines. + private def errorJson(message: String): String = + mapper.writeValueAsString(mapper.createObjectNode().put("error", message)) + + private val jupyterUrl = StorageConfig.jupyterURL + private val jupyterToken = StorageConfig.jupyterToken + // The token is passed as a URL param so the browser iframe can authenticate when loading the notebook. + // jupyterIframeURL is process-global state. This is safe ONLY because each user runs their own pod + // (own notebook-migration-service JVM + own Jupyter) in the k8s deployment, so this singleton is + // effectively per-user. Do NOT deploy this service as a shared multi-user instance without adding + // per-user keying here, or one user's upload would overwrite another's iframe URL. + private var jupyterIframeURL = s"$jupyterUrl/notebooks/work/notebook.ipynb?token=$jupyterToken" + + private def isJupyterAvailable(jupyterUrl: String): Boolean = { + var conn: java.net.HttpURLConnection = null + try { + conn = new java.net.URL(s"$jupyterUrl/api") + .openConnection() + .asInstanceOf[java.net.HttpURLConnection] + + conn.setRequestMethod("GET") + conn.setConnectTimeout(2000) + conn.setReadTimeout(2000) + + val status = conn.getResponseCode + + status == 200 || status == 403 + } catch { + case _: Exception => false + } finally { + if (conn != null) conn.disconnect() + } + } + + // Returns the Jupyter iframe reference URL + def getJupyterIframeURL(): Response = { + if (!isJupyterAvailable(jupyterUrl)) { + return Response + .status(500) + .entity( + """ + { + "success": false, + "message": "Cannot connect to Jupyter server" + } + """ + ) + .build() + } + + Response + .ok( + s""" + { + "success": true, + "url": "$jupyterIframeURL" + } + """ + ) + .build() + } + + // Returns the URL of Jupyter + def getJupyterURL(): Response = { + if (!isJupyterAvailable(jupyterUrl)) { + return Response + .status(500) + .entity( + """ + { + "success": false, + "message": "Cannot connect to Jupyter server" + } + """ + ) + .build() + } + + Response + .ok( + s""" + { + "success": true, + "url": "$jupyterUrl" + } + """ + ) + .build() + } + + // Set the notebook in Jupyter + def setNotebook(body: String): Response = { + if (!isJupyterAvailable(jupyterUrl)) { + return Response + .status(500) + .entity( + """ + { + "success": false, + "message": "Cannot connect to Jupyter server" + } + """ + ) + .build() + } + + var conn: HttpURLConnection = null + try { + val json = mapper.readTree(body) + + val notebookName = json.get("notebookName").asText() + val notebookData = json.get("notebookData") + + // Construct Jupyter API URL + val apiUrl = s"$jupyterUrl/api/contents/work/$notebookName" Review Comment: `notebookName` is taken from the request body and interpolated into two unsafe contexts, both without any validation: 1. **URL path (line 153):** `s"$jupyterUrl/api/contents/work/$notebookName"`. A `notebookName=../../etc/notebook.ipynb` after HTTP path normalization escapes `work/` and hits a different Jupyter contents endpoint. Jupyter's contents API has scope checks but defense in depth would catch this before the request even goes out. 2. **JSON response injection (line 193 → line 94):** `setNotebook` writes `notebookName` into the process-global `jupyterIframeURL`. Subsequent `getJupyterIframeURL()` calls then do raw JSON string concat at line 94: `"url": "$jupyterIframeURL"`. If `notebookName` ever contains a `"` character, the response is malformed JSON — frontend `JSON.parse` throws on iframe reload. The error path already uses `errorJson` for safe JSON building (fixed in `f0cf26172`); the success paths here still use raw `s"..."` interpolation. A simple regex check on `notebookName` at the top of `setNotebook` (e.g., reject anything outside `[A-Za-z0-9._-]+\.ipynb`) closes the URL-path angle. The success-path JSON could either use `errorJson`-style ObjectMapper construction, or just sanitize the URL before storing it in `jupyterIframeURL`. In the per-user-pod deployment model described elsewhere in this file, the real-world impact of the URL-path angle is bounded (a user can only attack their own pod's Jupyter, which they already control via the iframe). But the fix is cheap (a few lines), survives a future shared-deployment change, and is the kind of pattern static-analysis / security audits will flag. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
