From d32648b204395fe3590ca2de5f38f0f97da510aa Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Tue, 18 Jun 2024 07:28:47 +0800 Subject: [PATCH] Refactor route path normalization (#31381) Refactor route path normalization and decouple it from the chi router. Fix the TODO, fix the legacy strange path behavior. --- modules/web/route.go | 59 ++++++++++++++++- modules/web/route_test.go | 44 +++++++++++++ routers/common/middleware.go | 68 ++++---------------- routers/common/middleware_test.go | 70 --------------------- routers/init.go | 3 +- services/context/base.go | 19 +++--- services/context/repo.go | 6 +- tests/integration/nonascii_branches_test.go | 41 ++++++------ 8 files changed, 153 insertions(+), 157 deletions(-) delete mode 100644 routers/common/middleware_test.go diff --git a/modules/web/route.go b/modules/web/route.go index 805fcb441..34290bd8e 100644 --- a/modules/web/route.go +++ b/modules/web/route.go @@ -5,8 +5,10 @@ package web import ( "net/http" + "net/url" "strings" + "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/web/middleware" "gitea.com/go-chi/binding" @@ -160,7 +162,7 @@ func (r *Route) Patch(pattern string, h ...any) { // ServeHTTP implements http.Handler func (r *Route) ServeHTTP(w http.ResponseWriter, req *http.Request) { - r.R.ServeHTTP(w, req) + r.normalizeRequestPath(w, req, r.R) } // NotFound defines a handler to respond whenever a route could not be found. @@ -168,6 +170,61 @@ func (r *Route) NotFound(h http.HandlerFunc) { r.R.NotFound(h) } +func (r *Route) normalizeRequestPath(resp http.ResponseWriter, req *http.Request, next http.Handler) { + normalized := false + normalizedPath := req.URL.EscapedPath() + if normalizedPath == "" { + normalizedPath, normalized = "/", true + } else if normalizedPath != "/" { + normalized = strings.HasSuffix(normalizedPath, "/") + normalizedPath = strings.TrimRight(normalizedPath, "/") + } + removeRepeatedSlashes := strings.Contains(normalizedPath, "//") + normalized = normalized || removeRepeatedSlashes + + // the following code block is a slow-path for replacing all repeated slashes "//" to one single "/" + // if the path doesn't have repeated slashes, then no need to execute it + if removeRepeatedSlashes { + buf := &strings.Builder{} + for i := 0; i < len(normalizedPath); i++ { + if i == 0 || normalizedPath[i-1] != '/' || normalizedPath[i] != '/' { + buf.WriteByte(normalizedPath[i]) + } + } + normalizedPath = buf.String() + } + + // If the config tells Gitea to use a sub-url path directly without reverse proxy, + // then we need to remove the sub-url path from the request URL path. + // But "/v2" is special for OCI container registry, it should always be in the root of the site. + if setting.UseSubURLPath { + remainingPath, ok := strings.CutPrefix(normalizedPath, setting.AppSubURL+"/") + if ok { + normalizedPath = "/" + remainingPath + } else if normalizedPath == setting.AppSubURL { + normalizedPath = "/" + } else if !strings.HasPrefix(normalizedPath+"/", "/v2/") { + // do not respond to other requests, to simulate a real sub-path environment + http.Error(resp, "404 page not found, sub-path is: "+setting.AppSubURL, http.StatusNotFound) + return + } + normalized = true + } + + // if the path is normalized, then fill it back to the request + if normalized { + decodedPath, err := url.PathUnescape(normalizedPath) + if err != nil { + http.Error(resp, "400 Bad Request: unable to unescape path "+normalizedPath, http.StatusBadRequest) + return + } + req.URL.RawPath = normalizedPath + req.URL.Path = decodedPath + } + + next.ServeHTTP(resp, req) +} + // Combo delegates requests to Combo func (r *Route) Combo(pattern string, h ...any) *Combo { return &Combo{r, pattern, h} diff --git a/modules/web/route_test.go b/modules/web/route_test.go index cc0e26a12..c5595a02a 100644 --- a/modules/web/route_test.go +++ b/modules/web/route_test.go @@ -10,6 +10,9 @@ import ( "strconv" "testing" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/test" + chi "github.com/go-chi/chi/v5" "github.com/stretchr/testify/assert" ) @@ -176,3 +179,44 @@ func TestRoute3(t *testing.T) { assert.EqualValues(t, http.StatusOK, recorder.Code) assert.EqualValues(t, 4, hit) } + +func TestRouteNormalizePath(t *testing.T) { + type paths struct { + EscapedPath, RawPath, Path string + } + testPath := func(reqPath string, expectedPaths paths) { + recorder := httptest.NewRecorder() + recorder.Body = bytes.NewBuffer(nil) + + actualPaths := paths{EscapedPath: "(none)", RawPath: "(none)", Path: "(none)"} + r := NewRoute() + r.Get("/*", func(resp http.ResponseWriter, req *http.Request) { + actualPaths.EscapedPath = req.URL.EscapedPath() + actualPaths.RawPath = req.URL.RawPath + actualPaths.Path = req.URL.Path + }) + + req, err := http.NewRequest("GET", reqPath, nil) + assert.NoError(t, err) + r.ServeHTTP(recorder, req) + assert.Equal(t, expectedPaths, actualPaths, "req path = %q", reqPath) + } + + // RawPath could be empty if the EscapedPath is the same as escape(Path) and it is already normalized + testPath("/", paths{EscapedPath: "/", RawPath: "", Path: "/"}) + testPath("//", paths{EscapedPath: "/", RawPath: "/", Path: "/"}) + testPath("/%2f", paths{EscapedPath: "/%2f", RawPath: "/%2f", Path: "//"}) + testPath("///a//b/", paths{EscapedPath: "/a/b", RawPath: "/a/b", Path: "/a/b"}) + + defer test.MockVariableValue(&setting.UseSubURLPath, true)() + defer test.MockVariableValue(&setting.AppSubURL, "/sub-path")() + testPath("/", paths{EscapedPath: "(none)", RawPath: "(none)", Path: "(none)"}) // 404 + testPath("/sub-path", paths{EscapedPath: "/", RawPath: "/", Path: "/"}) + testPath("/sub-path/", paths{EscapedPath: "/", RawPath: "/", Path: "/"}) + testPath("/sub-path//a/b///", paths{EscapedPath: "/a/b", RawPath: "/a/b", Path: "/a/b"}) + testPath("/sub-path/%2f/", paths{EscapedPath: "/%2f", RawPath: "/%2f", Path: "//"}) + // "/v2" is special for OCI container registry, it should always be in the root of the site + testPath("/v2", paths{EscapedPath: "/v2", RawPath: "/v2", Path: "/v2"}) + testPath("/v2/", paths{EscapedPath: "/v2", RawPath: "/v2", Path: "/v2"}) + testPath("/v2/%2f", paths{EscapedPath: "/v2/%2f", RawPath: "/v2/%2f", Path: "/v2//"}) +} diff --git a/routers/common/middleware.go b/routers/common/middleware.go index de4964839..51e42d87a 100644 --- a/routers/common/middleware.go +++ b/routers/common/middleware.go @@ -19,13 +19,23 @@ import ( "gitea.com/go-chi/session" "github.com/chi-middleware/proxy" - chi "github.com/go-chi/chi/v5" + "github.com/go-chi/chi/v5" ) // ProtocolMiddlewares returns HTTP protocol related middlewares, and it provides a global panic recovery func ProtocolMiddlewares() (handlers []any) { - // first, normalize the URL path - handlers = append(handlers, normalizeRequestPathMiddleware) + // make sure chi uses EscapedPath(RawPath) as RoutePath, then "%2f" could be handled correctly + handlers = append(handlers, func(next http.Handler) http.Handler { + return http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) { + ctx := chi.RouteContext(req.Context()) + if req.URL.RawPath == "" { + ctx.RoutePath = req.URL.EscapedPath() + } else { + ctx.RoutePath = req.URL.RawPath + } + next.ServeHTTP(resp, req) + }) + }) // prepare the ContextData and panic recovery handlers = append(handlers, func(next http.Handler) http.Handler { @@ -75,58 +85,6 @@ func ProtocolMiddlewares() (handlers []any) { return handlers } -func normalizeRequestPathMiddleware(next http.Handler) http.Handler { - return http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) { - // escape the URL RawPath to ensure that all routing is done using a correctly escaped URL - req.URL.RawPath = req.URL.EscapedPath() - - urlPath := req.URL.RawPath - rctx := chi.RouteContext(req.Context()) - if rctx != nil && rctx.RoutePath != "" { - urlPath = rctx.RoutePath - } - - normalizedPath := strings.TrimRight(urlPath, "/") - // the following code block is a slow-path for replacing all repeated slashes "//" to one single "/" - // if the path doesn't have repeated slashes, then no need to execute it - if strings.Contains(normalizedPath, "//") { - buf := &strings.Builder{} - prevWasSlash := false - for _, chr := range normalizedPath { - if chr != '/' || !prevWasSlash { - buf.WriteRune(chr) - } - prevWasSlash = chr == '/' - } - normalizedPath = buf.String() - } - - if setting.UseSubURLPath { - remainingPath, ok := strings.CutPrefix(normalizedPath, setting.AppSubURL+"/") - if ok { - normalizedPath = "/" + remainingPath - } else if normalizedPath == setting.AppSubURL { - normalizedPath = "/" - } else if !strings.HasPrefix(normalizedPath+"/", "/v2/") { - // do not respond to other requests, to simulate a real sub-path environment - http.Error(resp, "404 page not found, sub-path is: "+setting.AppSubURL, http.StatusNotFound) - return - } - // TODO: it's not quite clear about how req.URL and rctx.RoutePath work together. - // Fortunately, it is only used for debug purpose, we have enough time to figure it out in the future. - req.URL.RawPath = normalizedPath - req.URL.Path = normalizedPath - } - - if rctx == nil { - req.URL.Path = normalizedPath - } else { - rctx.RoutePath = normalizedPath - } - next.ServeHTTP(resp, req) - }) -} - func Sessioner() func(next http.Handler) http.Handler { return session.Sessioner(session.Options{ Provider: setting.SessionConfig.Provider, diff --git a/routers/common/middleware_test.go b/routers/common/middleware_test.go deleted file mode 100644 index c96071c3a..000000000 --- a/routers/common/middleware_test.go +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright 2022 The Gitea Authors. All rights reserved. -// SPDX-License-Identifier: MIT -package common - -import ( - "net/http" - "net/http/httptest" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestStripSlashesMiddleware(t *testing.T) { - type test struct { - name string - expectedPath string - inputPath string - } - - tests := []test{ - { - name: "path with multiple slashes", - inputPath: "https://github.com///go-gitea//gitea.git", - expectedPath: "/go-gitea/gitea.git", - }, - { - name: "path with no slashes", - inputPath: "https://github.com/go-gitea/gitea.git", - expectedPath: "/go-gitea/gitea.git", - }, - { - name: "path with slashes in the middle", - inputPath: "https://git.data.coop//halfd/new-website.git", - expectedPath: "/halfd/new-website.git", - }, - { - name: "path with slashes in the middle", - inputPath: "https://git.data.coop//halfd/new-website.git", - expectedPath: "/halfd/new-website.git", - }, - { - name: "path with slashes in the end", - inputPath: "/user2//repo1/", - expectedPath: "/user2/repo1", - }, - { - name: "path with slashes and query params", - inputPath: "/repo//migrate?service_type=3", - expectedPath: "/repo/migrate", - }, - { - name: "path with encoded slash", - inputPath: "/user2/%2F%2Frepo1", - expectedPath: "/user2/%2F%2Frepo1", - }, - } - - for _, tt := range tests { - testMiddleware := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - assert.Equal(t, tt.expectedPath, r.URL.Path) - }) - - // pass the test middleware to validate the changes - handlerToTest := normalizeRequestPathMiddleware(testMiddleware) - // create a mock request to use - req := httptest.NewRequest("GET", tt.inputPath, nil) - // call the handler using a mock response recorder - handlerToTest.ServeHTTP(httptest.NewRecorder(), req) - } -} diff --git a/routers/init.go b/routers/init.go index 56c95cd1c..5cf40a415 100644 --- a/routers/init.go +++ b/routers/init.go @@ -191,7 +191,8 @@ func NormalRoutes() *web.Route { if setting.Packages.Enabled { // This implements package support for most package managers r.Mount("/api/packages", packages_router.CommonRoutes()) - // This implements the OCI API (Note this is not preceded by /api but is instead /v2) + // This implements the OCI API, this container registry "/v2" endpoint must be in the root of the site. + // If site admin deploys Gitea in a sub-path, they must configure their reverse proxy to map the "https://host/v2" endpoint to Gitea. r.Mount("/v2", packages_router.ContainerRoutes()) } diff --git a/services/context/base.go b/services/context/base.go index 23f0bcfc3..ccccee8c3 100644 --- a/services/context/base.go +++ b/services/context/base.go @@ -18,6 +18,7 @@ import ( "code.gitea.io/gitea/modules/json" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/optional" + "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/translation" "code.gitea.io/gitea/modules/web/middleware" @@ -142,23 +143,27 @@ func (b *Base) RemoteAddr() string { return b.Req.RemoteAddr } -// Params returns the param on route -func (b *Base) Params(p string) string { - s, _ := url.PathUnescape(chi.URLParam(b.Req, strings.TrimPrefix(p, ":"))) +// Params returns the param in request path, eg: "/{var}" => "/a%2fb", then `var == "a/b"` +func (b *Base) Params(name string) string { + s, err := url.PathUnescape(b.PathParamRaw(name)) + if err != nil && !setting.IsProd { + panic("Failed to unescape path param: " + err.Error() + ", there seems to be a double-unescaping bug") + } return s } -func (b *Base) PathParamRaw(p string) string { - return chi.URLParam(b.Req, strings.TrimPrefix(p, ":")) +// PathParamRaw returns the raw param in request path, eg: "/{var}" => "/a%2fb", then `var == "a%2fb"` +func (b *Base) PathParamRaw(name string) string { + return chi.URLParam(b.Req, strings.TrimPrefix(name, ":")) } -// ParamsInt64 returns the param on route as int64 +// ParamsInt64 returns the param in request path as int64 func (b *Base) ParamsInt64(p string) int64 { v, _ := strconv.ParseInt(b.Params(p), 10, 64) return v } -// SetParams set params into routes +// SetParams set request path params into routes func (b *Base) SetParams(k, v string) { chiCtx := chi.RouteContext(b) chiCtx.URLParams.Add(strings.TrimPrefix(k, ":"), url.PathEscape(v)) diff --git a/services/context/repo.go b/services/context/repo.go index d9a3feaf2..47cbf4ffd 100644 --- a/services/context/repo.go +++ b/services/context/repo.go @@ -1006,12 +1006,12 @@ func RepoRefByType(refType RepoRefType, ignoreNotExistErr ...bool) func(*Context if refType == RepoRefLegacy { // redirect from old URL scheme to new URL scheme prefix := strings.TrimPrefix(setting.AppSubURL+strings.ToLower(strings.TrimSuffix(ctx.Req.URL.Path, ctx.Params("*"))), strings.ToLower(ctx.Repo.RepoLink)) - - ctx.Redirect(path.Join( + redirect := path.Join( ctx.Repo.RepoLink, util.PathEscapeSegments(prefix), ctx.Repo.BranchNameSubURL(), - util.PathEscapeSegments(ctx.Repo.TreePath))) + util.PathEscapeSegments(ctx.Repo.TreePath)) + ctx.Redirect(redirect) return cancel } } diff --git a/tests/integration/nonascii_branches_test.go b/tests/integration/nonascii_branches_test.go index 8917a9b57..a189273ea 100644 --- a/tests/integration/nonascii_branches_test.go +++ b/tests/integration/nonascii_branches_test.go @@ -4,6 +4,7 @@ package integration import ( + "fmt" "net/http" "net/url" "path" @@ -14,22 +15,6 @@ import ( "github.com/stretchr/testify/assert" ) -func testSrcRouteRedirect(t *testing.T, session *TestSession, user, repo, route, expectedLocation string, expectedStatus int) { - prefix := path.Join("/", user, repo, "src") - - // Make request - req := NewRequest(t, "GET", path.Join(prefix, route)) - resp := session.MakeRequest(t, req, http.StatusSeeOther) - - // Check Location header - location := resp.Header().Get("Location") - assert.Equal(t, path.Join(prefix, expectedLocation), location) - - // Perform redirect - req = NewRequest(t, "GET", location) - session.MakeRequest(t, req, expectedStatus) -} - func setDefaultBranch(t *testing.T, session *TestSession, user, repo, branch string) { location := path.Join("/", user, repo, "settings/branches") csrf := GetCSRF(t, session, location) @@ -41,7 +26,7 @@ func setDefaultBranch(t *testing.T, session *TestSession, user, repo, branch str session.MakeRequest(t, req, http.StatusSeeOther) } -func TestNonasciiBranches(t *testing.T) { +func TestNonAsciiBranches(t *testing.T) { testRedirects := []struct { from string to string @@ -98,6 +83,7 @@ func TestNonasciiBranches(t *testing.T) { to: "branch/%E3%83%96%E3%83%A9%E3%83%B3%E3%83%81", status: http.StatusOK, }, + // Tags { from: "Тэг", @@ -119,6 +105,7 @@ func TestNonasciiBranches(t *testing.T) { to: "tag/%E3%82%BF%E3%82%B0/%E3%83%95%E3%82%A1%E3%82%A4%E3%83%AB.md", status: http.StatusOK, }, + // Files { from: "README.md", @@ -135,6 +122,7 @@ func TestNonasciiBranches(t *testing.T) { to: "branch/Plus+Is+Not+Space/%E3%83%95%E3%82%A1%E3%82%A4%E3%83%AB.md", status: http.StatusNotFound, // it's not on default branch }, + // Same but url-encoded (few tests) { from: "%E3%83%96%E3%83%A9%E3%83%B3%E3%83%81", @@ -205,10 +193,23 @@ func TestNonasciiBranches(t *testing.T) { session := loginUser(t, user) setDefaultBranch(t, session, user, repo, "Plus+Is+Not+Space") + defer setDefaultBranch(t, session, user, repo, "master") for _, test := range testRedirects { - testSrcRouteRedirect(t, session, user, repo, test.from, test.to, test.status) - } + t.Run(test.from, func(t *testing.T) { + req := NewRequest(t, "GET", fmt.Sprintf("/%s/%s/src/%s", user, repo, test.from)) + resp := session.MakeRequest(t, req, http.StatusSeeOther) + if resp.Code != http.StatusSeeOther { + return + } - setDefaultBranch(t, session, user, repo, "master") + redirectLocation := resp.Header().Get("Location") + if !assert.Equal(t, fmt.Sprintf("/%s/%s/src/%s", user, repo, test.to), redirectLocation) { + return + } + + req = NewRequest(t, "GET", redirectLocation) + session.MakeRequest(t, req, test.status) + }) + } }