From 4e8e71e4e2dd567f46180d4e74928bc29387268d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sybren=20A=2E=20St=C3=BCvel?= Date: Mon, 21 Mar 2022 13:23:09 +0100 Subject: [PATCH] Initial checkin of Shaman of Flamenco 2 This is not yet working, it's just a direct copy of the Manager of Flamenco 2, with Logrus replaced by Zerolog. The API has been documented in flamenco-manager.yaml as a starting point for the integration. --- pkg/api/flamenco-manager.yaml | 198 ++++++++++++++ pkg/shaman/README.md | 107 ++++++++ pkg/shaman/TODO.md | 11 + .../_test_file_store/checkout_definition.txt | 4 + .../6001.blob | Bin 0 -> 6001 bytes .../3367.blob | 106 ++++++++ .../7488.blob | 230 ++++++++++++++++ .../486.blob | 18 ++ .../6664.blob | Bin 0 -> 6664 bytes .../7217.blob | 236 ++++++++++++++++ .../781.blob | Bin 0 -> 781 bytes .../7459.blob | 229 ++++++++++++++++ pkg/shaman/checkout/checkout_id.go | 31 +++ pkg/shaman/checkout/definition.go | 168 ++++++++++++ pkg/shaman/checkout/definition_test.go | 86 ++++++ .../checkout/definition_test_example.txt | 5 + pkg/shaman/checkout/logging.go | 29 ++ pkg/shaman/checkout/manager.go | 237 ++++++++++++++++ pkg/shaman/checkout/manager_test.go | 76 ++++++ pkg/shaman/checkout/routes.go | 191 +++++++++++++ pkg/shaman/checkout/routes_test.go | 125 +++++++++ pkg/shaman/cleanup.go | 253 ++++++++++++++++++ pkg/shaman/cleanup_test.go | 225 ++++++++++++++++ pkg/shaman/config/config.go | 55 ++++ pkg/shaman/config/testing.go | 58 ++++ pkg/shaman/fileserver/checkfile.go | 46 ++++ pkg/shaman/fileserver/fileserver.go | 73 +++++ pkg/shaman/fileserver/logging.go | 29 ++ pkg/shaman/fileserver/receivefile.go | 176 ++++++++++++ pkg/shaman/fileserver/receivefile_test.go | 87 ++++++ pkg/shaman/fileserver/receivelistener.go | 88 ++++++ pkg/shaman/fileserver/routes.go | 97 +++++++ pkg/shaman/fileserver/servefile.go | 83 ++++++ pkg/shaman/fileserver/servefile_test.go | 71 +++++ pkg/shaman/filestore/filestore.go | 196 ++++++++++++++ pkg/shaman/filestore/filestore_test.go | 155 +++++++++++ pkg/shaman/filestore/interface.go | 81 ++++++ pkg/shaman/filestore/logging.go | 29 ++ pkg/shaman/filestore/substore.go | 107 ++++++++ pkg/shaman/filestore/substore_test.go | 87 ++++++ pkg/shaman/filestore/tempfile.go | 89 ++++++ pkg/shaman/filestore/testing.go | 116 ++++++++ pkg/shaman/hasher/checksum.go | 36 +++ pkg/shaman/hasher/checksum_test.go | 44 +++ pkg/shaman/hasher/copier.go | 81 ++++++ pkg/shaman/httpserver/filefinder.go | 68 +++++ pkg/shaman/httpserver/gzip.go | 87 ++++++ pkg/shaman/httpserver/logging.go | 29 ++ pkg/shaman/httpserver/testroutes.go | 72 +++++ pkg/shaman/humanize.go | 47 ++++ pkg/shaman/jwtauth/dummy.go | 11 + pkg/shaman/logging.go | 29 ++ pkg/shaman/server.go | 107 ++++++++ pkg/shaman/touch/touch.go | 43 +++ pkg/shaman/touch/touch_linux.go | 43 +++ pkg/shaman/touch/touch_nonlinux.go | 36 +++ pkg/shaman/touch/touch_test.go | 54 ++++ 57 files changed, 5075 insertions(+) create mode 100644 pkg/shaman/README.md create mode 100644 pkg/shaman/TODO.md create mode 100644 pkg/shaman/_test_file_store/checkout_definition.txt create mode 100644 pkg/shaman/_test_file_store/stored/30/928ffced04c7008f3324fded86d133effea50828f5ad896196f2a2e190ac7e/6001.blob create mode 100644 pkg/shaman/_test_file_store/stored/59/0c148428d5c35fab3ebad2f3365bb469ab9c531b60831f3e826c472027a0b9/3367.blob create mode 100644 pkg/shaman/_test_file_store/stored/80/b749c27b2fef7255e7e7b3c2029b03b31299c75ff1f1c72732081c70a713a3/7488.blob create mode 100644 pkg/shaman/_test_file_store/stored/91/4853599dd2c351ab7b82b219aae6e527e51518a667f0ff32244b0c94c75688/486.blob create mode 100644 pkg/shaman/_test_file_store/stored/ba/c52223acab283d5fc5160560e617d4c0161690069b3e8b66fba546c47f5388/6664.blob create mode 100644 pkg/shaman/_test_file_store/stored/d6/fc7289b5196cc96748ea72f882a22c39b8833b457fe854ef4c03a01f5db0d3/7217.blob create mode 100644 pkg/shaman/_test_file_store/stored/dc/89f15de821ad1df3e78f8ef455e653a2d1862f2eb3f5ee78aa4ca68eb6fb35/781.blob create mode 100644 pkg/shaman/_test_file_store/stored/e7/fd2d9b2a7054baea5d776def36ba908b9857d49cee3e4746ad671abb02d23f/7459.blob create mode 100644 pkg/shaman/checkout/checkout_id.go create mode 100644 pkg/shaman/checkout/definition.go create mode 100644 pkg/shaman/checkout/definition_test.go create mode 100644 pkg/shaman/checkout/definition_test_example.txt create mode 100644 pkg/shaman/checkout/logging.go create mode 100644 pkg/shaman/checkout/manager.go create mode 100644 pkg/shaman/checkout/manager_test.go create mode 100644 pkg/shaman/checkout/routes.go create mode 100644 pkg/shaman/checkout/routes_test.go create mode 100644 pkg/shaman/cleanup.go create mode 100644 pkg/shaman/cleanup_test.go create mode 100644 pkg/shaman/config/config.go create mode 100644 pkg/shaman/config/testing.go create mode 100644 pkg/shaman/fileserver/checkfile.go create mode 100644 pkg/shaman/fileserver/fileserver.go create mode 100644 pkg/shaman/fileserver/logging.go create mode 100644 pkg/shaman/fileserver/receivefile.go create mode 100644 pkg/shaman/fileserver/receivefile_test.go create mode 100644 pkg/shaman/fileserver/receivelistener.go create mode 100644 pkg/shaman/fileserver/routes.go create mode 100644 pkg/shaman/fileserver/servefile.go create mode 100644 pkg/shaman/fileserver/servefile_test.go create mode 100644 pkg/shaman/filestore/filestore.go create mode 100644 pkg/shaman/filestore/filestore_test.go create mode 100644 pkg/shaman/filestore/interface.go create mode 100644 pkg/shaman/filestore/logging.go create mode 100644 pkg/shaman/filestore/substore.go create mode 100644 pkg/shaman/filestore/substore_test.go create mode 100644 pkg/shaman/filestore/tempfile.go create mode 100644 pkg/shaman/filestore/testing.go create mode 100644 pkg/shaman/hasher/checksum.go create mode 100644 pkg/shaman/hasher/checksum_test.go create mode 100644 pkg/shaman/hasher/copier.go create mode 100644 pkg/shaman/httpserver/filefinder.go create mode 100644 pkg/shaman/httpserver/gzip.go create mode 100644 pkg/shaman/httpserver/logging.go create mode 100644 pkg/shaman/httpserver/testroutes.go create mode 100644 pkg/shaman/humanize.go create mode 100644 pkg/shaman/jwtauth/dummy.go create mode 100644 pkg/shaman/logging.go create mode 100644 pkg/shaman/server.go create mode 100644 pkg/shaman/touch/touch.go create mode 100644 pkg/shaman/touch/touch_linux.go create mode 100644 pkg/shaman/touch/touch_nonlinux.go create mode 100644 pkg/shaman/touch/touch_test.go diff --git a/pkg/api/flamenco-manager.yaml b/pkg/api/flamenco-manager.yaml index 4837fefa..ff4c4dd1 100644 --- a/pkg/api/flamenco-manager.yaml +++ b/pkg/api/flamenco-manager.yaml @@ -260,6 +260,159 @@ paths: application/json: schema: {$ref: "#/components/schemas/Job"} + /shaman/checkout/requirements: + summary: Allows a client to check which files are available on the server, and which ones are still unknown. + post: + operationId: shamanCheckoutRequirements + summary: Checks a Shaman Requirements file, and reports which files are unknown. + tags: [shaman] + requestBody: + description: Set of files to check + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/ShamanRequirements" + responses: + "200": + description: Subset of the posted requirements, indicating the unknown files. + content: + application/json: + schema: {$ref: "#/components/schemas/ShamanRequirements"} + default: + description: unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + /shaman/checkout/create/{checkoutID}: + summary: Symlink a set of files into the checkout area. + post: + operationId: shamanCheckout + summary: Create a directory, and symlink the required files into it. The files must all have been uploaded to Shaman before calling this endpoint. + tags: [shaman] + parameters: + - name: checkoutID + in: path + required: true + schema: {type: string} + requestBody: + description: Set of files to check out. + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/ShamanCheckout" + responses: + "204": + description: Checkout was created succesfully. + "409": + description: Checkout already exists. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + default: + description: unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + /shaman/files/{checksum}/{filesize}: + summary: Upload files to the Shaman server. + options: + operationId: shamanFileStoreCheck + summary: > + Check the status of a file on the Shaman server. + tags: [shaman] + parameters: + - name: checksum + in: path + required: true + schema: {type: string} + description: SHA256 checksum of the file. + - name: filesize + in: path + required: true + schema: {type: integer} + description: Size of the file in bytes. + responses: + "200": + description: The file is known to the server. + "420": + description: The file is currently being uploaded to the server. + "404": + description: The file does not exist on the server. + default: + description: unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + post: + operationId: shamanFileStore + summary: > + Store a new file on the Shaman server. Note that the Shaman server can + forcibly close the HTTP connection when another client finishes uploading + the exact same file, to prevent double uploads. + tags: [shaman] + parameters: + - name: checksum + in: path + required: true + schema: {type: string} + description: SHA256 checksum of the file. + - name: filesize + in: path + required: true + schema: {type: integer} + description: Size of the file in bytes. + - name: X-Shaman-Can-Defer-Upload + in: header + required: false + schema: {type: boolean} + description: > + The client indicates that it can defer uploading this file. The + "208" response will not only be returned when the file is already + fully known to the Shaman server, but also when someone else is + currently uploading this file. + - name: X-Shaman-Original-Filename + in: header + required: false + schema: {type: string} + description: > + The original filename. If sent along with the request, it will be + included in the server logs, which can aid in debugging. + requestBody: + description: The file's contents. + required: true + content: + application/octet-stream: + example: Just the contents of any file. + responses: + "204": + description: Checkout was created succesfully. + "208": + description: > + The file has already been uploaded. Note that this can also be sent + when this file is currently in the process of being uploaded, and + `X-Shaman-Can-Defer-Upload: true` was sent in the request. + "409": + description: Checkout already exists. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + default: + description: unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + tags: - name: meta description: Info about the Flamenco Manager itself. @@ -267,6 +420,8 @@ tags: description: Job & task queries, submission, and management. - name: worker description: API for Flamenco Workers to communicate with Flamenco Manager. + - name: shaman + description: Shaman API, for file uploading & creating job checkouts. components: schemas: @@ -524,6 +679,49 @@ components: properties: message: {type: string} + ShamanRequirements: + type: object + description: Set of files with their SHA256 checksum and size in bytes. + properties: + "req": + type: array + items: + type: object + properties: + "c": {type: string, description: "SHA256 checksum of the file"} + "s": {type: integer, description: "File size in bytes"} + required: [c, s] + required: [req] + example: + req: + - c: 35b0491c27b0333d1fb45fc0789a12ca06b1d640d2569780b807de504d7029e0 + s: 1424 + - c: 63b72c63b9424fd13b9370fb60069080c3a15717cf3ad442635b187c6a895079 + s: 127 + + ShamanCheckout: + type: object + description: Set of files with their SHA256 checksum, size in bytes, and desired location in the checkout directory. + properties: + "req": + type: array + items: + type: object + properties: + "c": {type: string, description: "SHA256 checksum of the file"} + "s": {type: integer, description: "File size in bytes"} + "p": {type: string, description: "File checkout path"} + required: [c, s, p] + required: [req] + example: + req: + - c: 35b0491c27b0333d1fb45fc0789a12ca06b1d640d2569780b807de504d7029e0 + s: 1424 + p: definition.go + - c: 63b72c63b9424fd13b9370fb60069080c3a15717cf3ad442635b187c6a895079 + s: 127 + p: logging.go + securitySchemes: worker_auth: description: Username is the worker ID, password is the secret given at worker registration. diff --git a/pkg/shaman/README.md b/pkg/shaman/README.md new file mode 100644 index 00000000..269c129e --- /dev/null +++ b/pkg/shaman/README.md @@ -0,0 +1,107 @@ +# Shaman + +Shaman is a file storage server. It accepts uploaded files via HTTP, and stores them based on their +SHA256-sum and their file length. It can recreate directory structures by symlinking those files. +Shaman is intended to complement [Blender Asset +Tracer (BAT)](https://developer.blender.org/source/blender-asset-tracer/) and +[Flamenco](https://flamenco.io/), but can be used as a standalone component. + +The overall use looks like this: + +- User creates a set of files (generally via BAT-packing). +- User creates a Checkout Definition File (CDF), consisting of the SHA256-sums, file sizes, and file + paths. +- User sends the CDF to Shaman for inspection. +- Shaman replies which files still need uploading. +- User sends those files. +- User sends the CDF to Shaman and requests a checkout with a certain ID. +- Shaman creates the checkout by symlinking the files listed in the CDF. +- Shaman responds with the directory the checkout was created in. + +After this process, the checkout directory contains symlinks to all the files in the Checkout +Definition File. **The user only had to upload new and changed files.** + + +## File Store Structure + +The Shaman file store is structured as follows: + + shaman-store/ + .. uploading/ + .. /{checksum[0:2]}/{checksum[2:]}/{filesize}-{unique-suffix}.tmp + .. stored/ + .. /{checksum[0:2]}/{checksum[2:]}/{filesize}.blob + +When a file is uploaded, it goes through several stages: + +- Uploading: the file is being streamed over HTTP and in the process of + being stored to disk. The `{checksum}` and `{filesize}` fields are + as given by the user. While the file is being streamed to disk the + SHA256 hash is calculated. After upload is complete the user-provided + checksum and file size are compared to the SHA256 hash and actual size. + If these differ, the file is rejected. +- Stored: after uploading is complete, the file is stored in the `stored` + directory. Here the `{checksum}` and `{filesize}` fields can be assumed + to be correct. + +## Garbage Collection + +To prevent infinite growth of the File Store, the Shaman will periodically +perform a garbage collection sweep. Garbage Collection can be configured by +setting the following settings in `shaman.yaml`: + +- `garbageCollect.period`: this is the sleep time between garbage collector + sweeps. Default is `8h`. Set to `0` to disable garbage collection. +- `garbageCollect.maxAge`: files that are newer than this age are not + considered for garbage collection. Default is `744h` or 31 days. +- `garbageCollect.extraCheckoutPaths`: list of directories to include when + searching for symlinks. Shaman will never create a checkout here. + Default is empty. + +Every time a file is symlinked into a checkout directory, it is 'touched' +(that is, its modification time is set to 'now'). + +Files that are not referenced in any checkout, and that have a modification +time that is older than `garbageCollectMaxAge` will be deleted. + +To perform a dry run of the garbage collector, use `shaman -gc`. + + +## Key file generation + +SHAman uses JWT with `ES256` signatures. The public keys of the JWT-signing +authority need to be known, and stored in `jwtkeys/*-public*.pem`. +For more info, see `jwtkeys/README.md` + + +## Source code structure + +- `Makefile`: Used for building Shaman, testing, etc. +- `main.go`: The main entry point of the Shaman server. Handles CLI arguments, + setting up logging, starting & stopping the server. +- `auth`: JWT token handling, authentication wrappers for HTTP handlers. +- `checkout`: Creates (and deletes) checkouts of files by creating directories + and symlinking to the file storage. +- `config`: Configuration file handling. +- `fileserver`: Stores uploaded files in the file store, and serves files from + it. +- `filestore`: Stores files by SHA256-sum and file size. Has separate storage + bins for currently-uploading files and fully-stored files. +- `hasher`: Computes SHA256 sums. +- `httpserver`: The HTTP server itself (other packages just contain request + handlers, and not the actual server). +- `libshaman`: Combines the other modules into one Shaman server struct. + This allows `main.go` to start the Shaman server, and makes it possible in + the future to embed a Shaman server into another Go project. +`_py_client`: An example client in Python. Just hacked together as a proof of + concept and by no means of any official status. + + +## Non-source directories + +- `jwtkeys`: Public keys + a private key for JWT sigining. For now Shaman can + create its own dummy JWT keys, but in the future this will become optional + or be removed altogether. +- `static`: For serving static files for the web interface. +- `views`: Contains HTML files for the web interface. This probably will be + merged with `static` at some point. diff --git a/pkg/shaman/TODO.md b/pkg/shaman/TODO.md new file mode 100644 index 00000000..29c6758d --- /dev/null +++ b/pkg/shaman/TODO.md @@ -0,0 +1,11 @@ +# Ideas for the future + +In no particular order: + +- Remove testing endpoints (including the dummy JWT token generation). +- Monitor free harddisk space for checkout and file storage directories. +- Graceful shutdown: + * Close HTTP server while keeping current requests running. + * Complete currently-running checkouts. + * Maybe complete currently running file uploads? +- Automatic cleanup of unfinished uploads. diff --git a/pkg/shaman/_test_file_store/checkout_definition.txt b/pkg/shaman/_test_file_store/checkout_definition.txt new file mode 100644 index 00000000..709f2de8 --- /dev/null +++ b/pkg/shaman/_test_file_store/checkout_definition.txt @@ -0,0 +1,4 @@ +590c148428d5c35fab3ebad2f3365bb469ab9c531b60831f3e826c472027a0b9 3367 subdir/replacer.py +80b749c27b2fef7255e7e7b3c2029b03b31299c75ff1f1c72732081c70a713a3 7488 feed.py +914853599dd2c351ab7b82b219aae6e527e51518a667f0ff32244b0c94c75688 486 httpstuff.py +d6fc7289b5196cc96748ea72f882a22c39b8833b457fe854ef4c03a01f5db0d3 7217 filesystemstuff.py diff --git a/pkg/shaman/_test_file_store/stored/30/928ffced04c7008f3324fded86d133effea50828f5ad896196f2a2e190ac7e/6001.blob b/pkg/shaman/_test_file_store/stored/30/928ffced04c7008f3324fded86d133effea50828f5ad896196f2a2e190ac7e/6001.blob new file mode 100644 index 0000000000000000000000000000000000000000..2e1692d4974d6d63f578d74cf837cdabc6274fef GIT binary patch literal 6001 zcmZu#TXP%7mF}J!1|UdN5=BuL%djF@##*KlZQ{*p6|ZGlR-9EJTa+bvHpys+Zh%7q z7^r6;UNUfPnM%b~sYHHG`2l?Niyu;{{Dk~~ec7sh+RZ-cFZjjzP7eT4bby+kzMa$4 zr@wRScV}lS48OmvfBK6boMG($sWJI^sJw$G-QbJ~CU}Ph#hVA*w5`B0Z#%Hf+X)== zb_3VEy}&c?QcyB)Kk&`F9F+04I+gBBFvE$b-KlnGgIUhx>>eA}nD-dL2gCz&XJX^D!MdEx{o(7Pn6XwQl{v`?UH+p?Wn89}S|;c;r|!>GlU%+-ryUu;wOxh(0l?hURm_^{6L0nDP}ZfY(|**Hm)ptZR5dTR1ryOnd*&-sY&v!Rt+8`cQY+vn=FN7dXKIr~iA+CGJvyU*TZznmM|nCs>C zKIiPwkMA>@wNV0%1)6oL3;QG+`Jh{;v3m^js`sG=I1Q(v`SPx}Yt~UZLY^1@%(I5m8Mi$aZiLsu&2;YmryWX;U4#$Dz_}3&*5*`r%+cg9SoxN zX*%&#bLg1LmJA7*nH4-~ABBKaGdAMF%GtifS!QLnu=lK|w@`OTj*v4OdBWM}PhT6_ z+h=ooRLUXqp_4o5yB{)0+s`oy$z8e6NOm8wJM01VmqG0w=y4%cw-K}dVr+MI^{3Z? z(Js(fyO63fzC~MVtzOY~Cux(utqt0`lBuY<8D`K50~%d|imi9zHSIy!uKegN?IaK{ zkR&6ay+L0@nbhTVxh>+hOtZSDUM9}vwN3(^h<9b+5eJEbiF+g??V2I&PgLcw^ydr3 zl2f(%J{XSAz_F^l%6&W*&ci{u#E)`y2{T?0IlOjBLuC z!$(jVn=cH#gKtX-(hOqv?FERt!WS&{Z|H&j^ymw)GY^)kJpn)IEdA(Bw#del)=>PG z{g%V00Q^t<44yCJpTo?2&i3s0*dEz`r!hUo3GqJ@@i`PZ`{k)22n5Ud3HG@ue`B3w z#)g0?$Sp-StzotUowSy}uX&8#pP*M=qrzBWQu1%g*V?itxBKetpQD-5at_-)+gu;? zHj7OyUjv4tPWtxpbgJPG&aR4jZJZ)@4fck!S%cWWu%59eGY6R7nk>e+B7{!&OF|;0 zG_WO(fT98J2z%S-=>mByKp55nmSG-n<|Bu!#LzmR@s@Bm$+LNli6MPoodvUJ<_xHK z<#&GFRU}pIc9Jcr)G^f6c`7cTsM|(^186YJXrZy)s2w*CgX9D(FR7}MX(~lHo&F)1 zq$afD^PChGXt{~L=}vt#raS!$8X3#D;IM%Y$sXo_0&FCd6mKhYGB*ccgiU8HoIUFg zP|nOlAG`|noo2axn!!4R`w6k2P)bP7wJaKA>vjKcu?cwXqq?f@e7#+Pdqj?w+xG@w^Oer1qBTlsyIt!r#1e#yVw!%-vL`J zfMf0_y)?!-*IJ-cvYR|6XF?nQ&N9np4Mi|`Mn&Vxrqe3P5RW9jlO| z^6f>wz4*v`8{gFXtuor>vCwIp6S?prxV;PJcqHoR>E()EK!+ z&hpYKz6;i9b~n1Kb_kQ^7!731Q5KMl(%R2fZ#TxUyIO>&SD()kQ!Theh`o5R7Po4> zBm+7D2T@>6bJfrY6(38LLe-v;C(y?SaJ>`!PtV94FGYC>>$?={OoM-2Hk}l=7*pT= z^}^RT|8Munzt?Ash1AYQnjre=B#3@Ww^wf{l_*`g7j*`*sN3-TdST_`J9ln0?uPGw zaO*~R@5Y_g4{kTK4ezggz|O3xOQ_L8+Rc(dbG=?U1O+|z-1J3=fTJB+G^m;n9B&e+ z9j6{4dGRr7jZlh3f|)`@W}!n6R^*C4#bdlH^DIPFh^8zL@I=yp()=v|7cSrDW<(k%2 z%!2l&pJg+jan!3Bqke?O?r3(OZOkpfUm{#CIf&~vx%xVL3FV(3UFEE2JtFFPqx*3h<@8UFr+|IPyXj~Yr~1eCC8kNz8$?AT`I3$7N) zigES{`X1nUA@6a%;OUjz#;kX;h0$>!3;5c%>2d|O$88udWYk%G3=!|Xu|gVBTj|KC zH|Sq2Jed@=3!;W|Pix58sZNTP))&>!K!d29V3QtjUlcHE$0SNeP*rrvpeXBBi!@S# zzcHFScOBu8%3zWT12LmyC(7c-GRzY7A2k0V&0qTY^#^0OM@C~cyngdWxO!t{6-ibP zR|uQhM???XCIzC;E&=pEiTD*Fc92>nNS05c5zLrTTvbq-Yxk0^;7D)K4UNA>oMyHa z%pdHcT`S6T)oEg2M+j|8=%kR~5Tf;Lvk+V1_rAjX^c@tG9XQq;C1psD3zYjsqHI-= zCRgG4=By=?JXbM~+7@W4h!GbssN_g{Oy2!z} z#T^3f3{0*-cL=}09fDn?w9x)rMwi@4^dQEyoboAe_sY#dZ#V86Urflk1Z|tlfGRj`tXP=e zAJE7J6xub>9nONT7lLu{X8>BKj#4r(3Ag$uqBvxQDZ%?+JcSju=dCmR?DPWR99J}6 ze_j0uFF0Kazqd17c_?y@>Kqlu4U}+C(xznBN>a5<-3BmTw$rEHnP!4mxk-RXC)ZYx z4KcEbvqQIj#+s-;5tHlDWuWaA*dh(1t0JS$#l?&N0WP#6MBC %s', content_path, store_abspath) + content_path.rename(store_abspath) + if content_path != filepath and filepath.exists(): + # Otherwise we can't replace filepath with a symlink. + filepath.unlink() + log.debug(' symlink %s -> %s', store_abspath, filepath) + filepath.symlink_to(store_abspath) + + if content_path != filepath and content_path.exists(): + log.debug(' unlink: %s', content_path) + content_path.unlink() + + if interrupt_value.value: + log.error('Processing was aborted, not finishing %s', root) + return + + +def main(): + log = logging.getLogger('main') + log.info('starting') + + def interrupt_handler(signal, frame): + with interrupt_value.get_lock(): + if interrupt_value.value == 0: + print('CTRL+C received, will shut down soon') + interrupt_value.value += 1 + + signal.signal(signal.SIGINT, interrupt_handler) + + parser = argparse.ArgumentParser() + parser.add_argument('filestore', type=Path) + parser.add_argument('replacement_target', type=Path, nargs='+') + cli_args = parser.parse_args() + + filestore = cli_args.filestore / 'stored' + assert filestore.exists(), ('%s must exist' % filestore) + + futures = [] + with mp.Pool() as pool: + for path in cli_args.replacement_target: + if interrupt_value.value: + break + + log.info('queueing %s', path) + futures.append(pool.apply_async( + replace_from_path, (filestore, path.resolve()) + )) + pool.close() + + for path, future in zip(cli_args.replacement_target, futures): + try: + future.get() + except: + log.exception('task for path %s was not successful, aborting', path) + interrupt_value.value += 1 + + if interrupt_value.value: + log.error('stopped after abort/error') + raise SystemExit(47) + + log.info('done') + +if __name__ == '__main__': + main() diff --git a/pkg/shaman/_test_file_store/stored/80/b749c27b2fef7255e7e7b3c2029b03b31299c75ff1f1c72732081c70a713a3/7488.blob b/pkg/shaman/_test_file_store/stored/80/b749c27b2fef7255e7e7b3c2029b03b31299c75ff1f1c72732081c70a713a3/7488.blob new file mode 100644 index 00000000..a3f09c4d --- /dev/null +++ b/pkg/shaman/_test_file_store/stored/80/b749c27b2fef7255e7e7b3c2029b03b31299c75ff1f1c72732081c70a713a3/7488.blob @@ -0,0 +1,230 @@ +#!/usr/bin/env python3.7 + +import argparse +import atexit +from collections import deque +from dataclasses import dataclass +from pathlib import Path +import random +import sys +import typing + +import requests + +import filesystemstuff +import httpstuff + +parser = argparse.ArgumentParser() +parser.add_argument('root', type=Path) +parser.add_argument('shaman_url', type=str) +parser.add_argument('--checkout') +parser.add_argument('--sha-only', default=False, action='store_true') +parser.add_argument('--cleanup', default=False, action='store_true', help='Clean up cache files and exit') +cli_args = parser.parse_args() + +root = cli_args.root.resolve() + +if cli_args.cleanup: + filesystemstuff.cleanup_cache() + raise SystemExit('CLEAN!') + +shaman_url = httpstuff.normalise_url(cli_args.shaman_url) + +session: requests.Session() + +@dataclass +class FileInfo: + checksum: str + filesize: int + abspath: Path + +global_fileinfo = {} + + +def feed_lines() -> typing.Iterable[typing.Tuple[Path, bytes, typing.Optional[Path]]]: + for filepath in filesystemstuff.find_files(root): + content_path, checksum = filesystemstuff.compute_cached_checksum(filepath) + filesize = filepath.stat().st_size + relpath = filepath.relative_to(root) + + global_fileinfo[str(relpath)] = FileInfo( + checksum=checksum, + filesize=filesize, + abspath=filepath, + ) + + file_to_unlink = None if content_path == filepath else content_path + yield relpath, f'{checksum} {filesize} {relpath}\n'.encode('utf8'), file_to_unlink + + +def show_stats(): + print('filesystemstuff stats:') + print(f' computing checksums: {filesystemstuff.TimeInfo.computing_checksums:.3f} seconds') + print(f' handling caching : {filesystemstuff.TimeInfo.checksum_cache_handling:.3f} seconds') + + +def feed(definition_file: bytes, valid_paths: typing.Set[str]) -> typing.Set[str]: + print(f'Feeding {root} to the Shaman') + resp = session.post(f'{shaman_url}checkout/requirements', data=definition_file, stream=True) + if resp.status_code >= 300: + raise SystemExit(f'Error {resp.status_code}: {resp.text}') + + print('==========') + to_upload = deque() + for line in resp.iter_lines(): + response, path = line.decode().split(' ', 1) + print(f'{response}\t{path}') + + if path not in valid_paths: + raise RuntimeError(f'Shaman asked us for path {path!r} which we never offered') + + if response == 'file-unknown': + to_upload.appendleft(path) + elif response == 'already-uploading': + to_upload.append(path) + elif response == 'ERROR': + print(f'ERROR RESPONSE: {path}') + return + else: + print(f'UNKNOWN RESPONSE {response!r} FOR PATH {path!r}') + return + + print('==========') + print(f'Going to upload {len(to_upload)} files') + + failed_paths = upload_files(to_upload) + + if failed_paths: + print('Some files did not upload this iteration:') + for fname in sorted(failed_paths): + print(f' - {fname}') + + return failed_paths + + +def upload_files(to_upload: typing.Deque[str]) -> typing.Set[str]: + failed_paths = set() + deferred_paths = set() + + def defer(some_path: str): + nonlocal to_upload + + print(' - Shaman asked us to defer uploading this file.') + deferred_paths.add(some_path) + + # Instead of deferring this one file, randomize the files to upload. + # This prevents multiple deferrals when someone else is uploading + # files from the same project (because it probably happens alphabetically). + all_files = list(to_upload) + random.shuffle(all_files) + to_upload = deque(all_files) + to_upload.append(some_path) + + MAX_DEFERRED_PATHS = 8 + MAX_FAILED_PATHS = 8 + + while to_upload: + # After too many failures, just retry to get a fresh set of files to upload. + if len(failed_paths) > MAX_FAILED_PATHS: + print('Too many failures, going to abort this iteration') + failed_paths.update(to_upload) + return failed_paths + + path = to_upload.popleft() + fileinfo = global_fileinfo[path] + + headers = { + 'X-Shaman-Original-Filename': path + } + + # Let the Shaman know whether we can defer uploading this file or not. + can_defer = bool(len(deferred_paths) < MAX_DEFERRED_PATHS and path not in deferred_paths and len(to_upload)) + if can_defer: + headers['X-Shaman-Can-Defer-Upload'] = 'true' + + print(f'Uploading {path} ; can_defer={can_defer}') + try: + with fileinfo.abspath.open('rb') as infile: + resp = session.post( + f'{shaman_url}files/{fileinfo.checksum}/{fileinfo.filesize}', + data=infile, headers=headers) + resp.raise_for_status() + + if resp.status_code == 208: + if can_defer: + defer(path) + else: + print(' - Someone else already finished uploading this file.') + + except requests.ConnectionError as ex: + if can_defer: + # Closing the connection with an 'X-Shaman-Can-Defer-Upload: true' header + # indicates that we should defer the upload. + defer(path) + else: + print(f'Error uploading {path}, might retry later: {ex}') + failed_paths.add(path) + else: + failed_paths.discard(path) + + return failed_paths + + +def main(): + global session + + # Get an authentication token. + resp = requests.get(f'{shaman_url}get-token') + resp.raise_for_status() + session = httpstuff.session(token=resp.text) + + paths_to_unlink = set() + def unlink_temp_paths(): + for path in paths_to_unlink: + try: + if path.exists(): + path.unlink() + except Exception as ex: + print(f'Error deleting {path}: {ex}') + + + atexit.register(filesystemstuff.cleanup_cache) + atexit.register(show_stats) + atexit.register(unlink_temp_paths) + + print(f'Creating Shaman definition file from {root}') + allowed_paths = set() + definition_lines = [] + for relpath, line, content_path in feed_lines(): + allowed_paths.add(str(relpath)) + definition_lines.append(line) + paths_to_unlink.add(content_path) + + definition_file = b''.join(definition_lines) + print(f'Computed SHA sums, definition file is {len(definition_file) // 1024} KiB') + sys.stdout.buffer.write(definition_file) + if cli_args.sha_only: + return + + for try_count in range(50): + print(f'========== Upload attempt {try_count+1}') + failed_paths = feed(definition_file, allowed_paths) + if not failed_paths: + break + + print('==========') + if failed_paths: + raise SystemExit('Aborted due to repeated upload failure') + else: + print(f'All files uploaded succesfully in {try_count+1} iterations') + + if cli_args.checkout: + print(f'Going to ask for a checkout with ID {cli_args.checkout}') + resp = session.post(f'{shaman_url}checkout/create/{cli_args.checkout}', data=definition_file) + resp.raise_for_status() + print(f'Received status {resp.status_code}: {resp.text}') + else: + print('Not asking for a checkout, use --checkout if you want this.') + +if __name__ == '__main__': + main() diff --git a/pkg/shaman/_test_file_store/stored/91/4853599dd2c351ab7b82b219aae6e527e51518a667f0ff32244b0c94c75688/486.blob b/pkg/shaman/_test_file_store/stored/91/4853599dd2c351ab7b82b219aae6e527e51518a667f0ff32244b0c94c75688/486.blob new file mode 100644 index 00000000..eb62e46f --- /dev/null +++ b/pkg/shaman/_test_file_store/stored/91/4853599dd2c351ab7b82b219aae6e527e51518a667f0ff32244b0c94c75688/486.blob @@ -0,0 +1,18 @@ +import urllib.parse +import requests.adapters + + +def session(token: str): + session = requests.session() + session.headers['Authorization'] = f'Bearer {token}' + session.headers['Content-Type'] = 'text/plain' + + http_adapter = requests.adapters.HTTPAdapter(max_retries=5) + session.mount('https://', http_adapter) + session.mount('http://', http_adapter) + + return session + + +def normalise_url(url: str) -> str: + return urllib.parse.urlunparse(urllib.parse.urlparse(url)) diff --git a/pkg/shaman/_test_file_store/stored/ba/c52223acab283d5fc5160560e617d4c0161690069b3e8b66fba546c47f5388/6664.blob b/pkg/shaman/_test_file_store/stored/ba/c52223acab283d5fc5160560e617d4c0161690069b3e8b66fba546c47f5388/6664.blob new file mode 100644 index 0000000000000000000000000000000000000000..3459df768783bb9785d28792dfe910bd2f0bed8d GIT binary patch literal 6664 zcmZu#OLH98bw0Pdr{{qI2m&M?B1DcJ6gd_NxDDStvU1r~StM0<7TIQJoY5x*|`F+AZ>|QQaob z9@QQ4oKd|*UTIW!$#X~bGI`}uy+U4PRIid(mAXaU8ucc~n;^yeu&G<8-XwXGq@qcB zPP9G6W$4g3?-YfX$(yDSq)t;*qU{-qX6ZS0dxma>pje|XF}6m}vFWn}0#TK==eV*M zPEc5Q3%4*qxQvP?WZZ68MVJc(E>&1Midu7ahAd}6rQE89HF=* zGhY>oa}>^xW}cUJ>}7#A!AY!#RToFS7o^vv?FL1!@|f@y3eQovL|>JJ>}w|n@EELe zp27?Cl`CUjr?5fcYf^uMB1PM8%0@2=LG->s;hV!1nO$!Ux5S&dNa0(f&0Jz6tTmg5 zjFnbXnNk=%9z>`(9|!r?!P0U&Xm3R;*;ddClB?}*93?q_WrJQ8PfrmYE@0;2buIuc zYU3S03bhaLW`H9_kbz)~(S=FHA|-A`$^D?eBac3 zzn6xCF4yb6|9B8|hds0HwAUZxankXz)%Ff=&CHLgz8t{c3X-skA-^R(3F=aO?XPU5 zJsfU-Lr2NVZmM^(e$b9qI_XNL+bf+o?*<#oAiuKFjgm0ZSK}nh<9v{hII`mR_x&MP zHsfxT?Pqz^%kshI=5l{um$B4YyqSZdu3RN8P0KhsO>^VwC?Dvg&3xjAN7;CpiP=A+ z$cdg3breJXnWDYFr>BbaKXJK@MxtjHJ+){Xf5J`-)7RLgJG8i65pusiTHt}gto4u}!BWepqsROu3)-yU}KD9G04?WqF|UY&Xugj2mZu7;96G^GI{W4f=hs+c?Zi zQ;oAQYRA2x+jR6Prd0w$2a$35K&UCTx8iPiBvcJdlBsRRN$872Ucq<9ASHE5RaM=Y zQLa^2v+A^R?Te`E3+RJPDmS;8zM?!J&!40c;vtOVHU6utz0}+H^B(e2TxkWi- z0FGPqVgug}Z419yL>LPbiBCl@-evCZeT!MYU1l+$@tH$U9m*cj$3wwY_!}MS@hw_s z27mI9VOE-wbeYOX6;BFjx=`F`thV0y?Q|)^hKSjMbA#Oe^Oetp(RDv z2iRN2dPa~?-Ck0QkkNTd|2bM<%3a<8PUgaBc1G&NR)8P=U&3qS&90#M8UB^@;!m6b+yGoE_ym8;UX}ZBX2Ix-IZ+Du4R~Bp1Ymz`SYVoV*bTd_kp=&*ejU^g zMM_*;_}faeq~Ar&l)C9|r1c!?n)$9TqiEV=a{$v_sDCf$#O;$XnuoP#z1LBeMWHY5 zf`@+$B3TUusRiX&GwQ6RZ{cfv^a?&$z2xZsjS93K7~vDc;{I4HFiwo)vvn@)GU_nP z4&~5LK^K_8U?|M=-@!NlXYhjP1EVbII|s!-#|9bPE7Nnaw@`j)K9n3r0$6;8DK6<> z@EBAOlXQbT%h5C#2a zDOdt*S0&Vm8u?b#fUSp_Zj7vJBTgDYW5n7xO|AyJFeQ!MV1Kz*Yv8#j(MBed^8ws( zZMQEtMS3VvCOip zq3Zy}Aw#Hj_y?5Dy1s~i;@!kF37#~gS(M70zIEsBU*5RmuijYw-cA4B-MjZWItttmdpOz&!hE60X3E204UJ&-q(FM-^vb%TsqjEG178TZ%Q1IVr82gUx|q6F}kVpZ6Lh*^f6DdI+G$vU*=lZWfc z5;@R0s2Y@Er$+iVDW#wjYo8^G9zuEGVWD*#16drwK$oC+JJ>^)b#D!drS~|rN}z~k z(J+|<_@kTWKotrqNGcHrp+%IE>DCCz9=Aa8%A|i!IR>K_)~-Se&^aW6ywvZd34^L^ z?)VcrF1}o0-WuD?=Hx+eU+*)b1q1>(0YU-jAZvVo?QUxb+V_e8{=K77;aG>RFy1d+ zYQ&q3B+Y?sfGI4HYLspn#Kcb`ok3A0YA|O9hzLg7GnQ}x4urEve0m(Lbs!jdOjQ~= z@)C#B`QpsK-1^UhFaM%BA!gY)+gXbEt(zi#E8ktasdcJN?Lp8TL`B_3Kw{3UetPfT z&DMSYySMM$^dH>3w|4t(%h(7?Oa-8oHBA{cUdWX4bkN>v)=mJ)3L#NSXwyD4&z1xI?fCJieG0H-Jk302Om!^N~Un?iiuV zT63bMA7D4aCjBYb*134d1#2!dnGHyG-&CT#7~)BHjM{|}`T-Al#6`dbM@OdA+d*Dc zIP1^2!3hVwezW}l@&|c3X>5XYn~M{IXrb+wc(XGo2u!zC8NV9;>_vQQ9>Fu^;aFuB zukiOD;K0DjB3Q|At%H*e)270Sf2KHX!jNIciwb-vyg6=uM!6IVWh9!te`9v*Y2ht+ zD83r}Ec`a|WX^l>DS4~pz2c2Ij>l-uuD+mk7AG1cIa7~6med(@#4Ke0O7P7;MmB+# zI`52ULD>8~x~*D(X~M2xK^%KWQTjW|PZ27K1*z-*r6ZctyhQ~V0Zo5`Q=n&k?-Avv z*)DyK@31+Vz9)I&nqXN73-VA_^Qv)@DT4P ze?)@$Q(j#EO66y0`z%A4qZ*i3n^Xdf7kRw+Q86h+I`j4_o36%cHwuzL|GmN&M`7b) z*Z``s25wfk&dQ!O=k)g>ZX_%W%Iuf}=Fr(Vl5R3|^gI`wmg`0CDZ^um^_aSWxKQUX zi-nPz&`~$Y<0p}yr~0pW{39N}@X?KjL&wdAYt6rL>!!bUb9D`=YJ!WN9aBNF1N$0< zh{Jmf|G(zRZ}4OX32KU)i6wk^6Vi(dCEO`^c9QOTr;|a?mxu!asvOIkKHkKq)~_

C5AE4mG$$@7^YKyxF7eCxg6t@!AB-&=wq%~_T zp;l8<)-3dD(z2mrQ)&`ky{_~V&_IeYC0Tr={d8kHYUidJi?yPO@q%PDcZBqdZ1iz4ef zc<0t2If(lbMlj&6K*{)KkR#qpUvLX!>)CQghV`TH((*z5Vs`W`*_yF-i>sbivuY;z zZp|<54u+Rs-X_VZbYp{uVcs(}CPx=(m~739s-G+WrG? z#t9nPbJpAH()i-0p)~i$Kl()Bn(CFi)V!>(p#ebyhrD|^E}b-PX-L1tg*d1(ZmK$b zhhU|dUg2iOmgnaCeQup-r^poAjGb(BzX%2~Asge2!}q#kiFBVQvr{xx0P_yWB6sCn zQcOW{^;Nlzo5ujThFk^ecewW|7kr^%>=@TEj4I=7q-nRvA~>El6CA$7Qe=Z}E+L0F zKhaGyH9Q=y&0!z3XWvNn3v`zf3r2obb_hK*zFd5b< ed$x38Zee<%{<^bJJOAcuOK+a4FT3h|<^KSKZ^|bC literal 0 HcmV?d00001 diff --git a/pkg/shaman/_test_file_store/stored/d6/fc7289b5196cc96748ea72f882a22c39b8833b457fe854ef4c03a01f5db0d3/7217.blob b/pkg/shaman/_test_file_store/stored/d6/fc7289b5196cc96748ea72f882a22c39b8833b457fe854ef4c03a01f5db0d3/7217.blob new file mode 100644 index 00000000..a8003a2a --- /dev/null +++ b/pkg/shaman/_test_file_store/stored/d6/fc7289b5196cc96748ea72f882a22c39b8833b457fe854ef4c03a01f5db0d3/7217.blob @@ -0,0 +1,236 @@ +import base64 +import contextlib +import gzip +import hashlib +import json +import logging +import os +import sys +import time +import typing +from collections import deque +from pathlib import Path + +GLOBAL_CACHE_ROOT = Path().home() / '.cache/shaman-client/shasums' +MAX_CACHE_FILES_AGE_SECS = 3600 * 24 * 60 # 60 days +CURRENT_FILE_VERSION = 2 + +log = logging.getLogger(__name__) + + +class TimeInfo: + computing_checksums = 0.0 + checksum_cache_handling = 0.0 + + +def find_files(root: Path) -> typing.Iterable[Path]: + queue = deque([root]) + while queue: + path = queue.popleft() + + # Ignore hidden files/dirs; these can be things like '.svn' or '.git', + # which shouldn't be sent to Shaman. + if path.name.startswith('.'): + continue + + if path.is_dir(): + for child in path.iterdir(): + queue.append(child) + continue + + # Skip .blend1, .blend2, etc. + if path.stem.startswith('.blend') and path.stem[-1].isdecimal(): + continue + + yield path + + +def compute_checksum(filepath: Path) -> str: + blocksize = 32 * 1024 + + log.debug('SHAsumming %s', filepath) + with track_time(TimeInfo, 'computing_checksums'): + hasher = hashlib.sha256() + with filepath.open('rb') as infile: + while True: + block = infile.read(blocksize) + if not block: + break + hasher.update(block) + checksum = hasher.hexdigest() + return checksum + + +def _cache_key(filepath: Path) -> str: + fs_encoding = sys.getfilesystemencoding() + filepath = filepath.absolute() + + # Reverse the directory, because most variation is in the last bytes. + rev_dir = str(filepath.parent)[::-1] + cache_path = '%s%s%s' % (filepath.stem, rev_dir, filepath.suffix) + encoded_path = cache_path.encode(fs_encoding) + cache_key = base64.urlsafe_b64encode(encoded_path).decode().rstrip('=') + + return cache_key + +def chunkstring(string: str, length: int) -> typing.Iterable[str]: + return (string[0+i:length+i] for i in range(0, len(string), length)) + + +def is_compressed_blendfile(filepath: Path) -> bool: + if not filepath.suffix.lower().startswith('.blend'): + return False + + with filepath.open('rb') as blendfile: + magic = blendfile.read(3) + + return magic == b'\x1f\x8b\x08' + + +def compute_cached_checksum(filepath: Path) -> (Path, str): + """Compute the SHA256 checksum in a compression-aware way. + + Returns the tuple `(content_path, checksum)`, where + `content_path` is either the path to the decompressed file (if + any) or the filepath itself. + + The caller is responsible for removing the decompressed file. + """ + + with track_time(TimeInfo, 'checksum_cache_handling'): + cache_key = _cache_key(filepath) + is_compressed = is_compressed_blendfile(filepath) + + # Don't create filenames that are longer than 255 characters. + last_parts = Path(*chunkstring(cache_key[10:], 255)) + cache_path = GLOBAL_CACHE_ROOT / cache_key[:10] / last_parts + current_stat = filepath.stat() + + checksum = parse_cache_file(cache_path, current_stat, is_compressed) + if checksum: + return filepath, checksum + + # Determine which path we want to checksum. + if is_compressed: + content_path = decompress(filepath) + else: + content_path = filepath + + checksum = compute_checksum(content_path) + + with track_time(TimeInfo, 'checksum_cache_handling'): + write_cache_file(cache_path, current_stat, is_compressed, checksum) + + return content_path, checksum + +def parse_cache_file(cache_path: Path, current_stat: os.stat_result, is_compressed: bool) -> str: + """Try to parse the cache file as JSON. + + :return: the cached checksum, or '' if not cached. + """ + + try: + with cache_path.open('r') as cache_file: + payload = json.load(cache_file) + except (OSError, ValueError): + # File may not exist, or have invalid contents. + return '' + + file_version = payload.get('version', 1) + if file_version < CURRENT_FILE_VERSION: + return '' + + checksum_key = 'uncompressed_checksum' if is_compressed else 'checksum' + checksum = payload.get(checksum_key, '') + cached_mtime = payload.get('file_mtime', 0.0) + cached_size = payload.get('file_size', 0) + + if checksum \ + and abs(cached_mtime - current_stat.st_mtime) < 0.01 \ + and current_stat.st_size == cached_size: + cache_path.touch() + return checksum + +def write_cache_file(cache_path: Path, current_stat: os.stat_result, is_compressed: bool, checksum: str) -> str: + checksum_key = 'uncompressed_checksum' if is_compressed else 'checksum' + payload = { + 'version': CURRENT_FILE_VERSION, + checksum_key: checksum, + 'file_mtime': current_stat.st_mtime, + 'file_size': current_stat.st_size, + 'is_compressed': is_compressed, + } + + cache_path.parent.mkdir(parents=True, exist_ok=True) + with cache_path.open('w') as cache_file: + json.dump(payload, cache_file) + + +def cleanup_cache(): + if not GLOBAL_CACHE_ROOT.exists(): + return + + with track_time(TimeInfo, 'checksum_cache_handling'): + queue = deque([GLOBAL_CACHE_ROOT]) + rmdir_queue = [] + + now = time.time() + num_removed_files = 0 + num_removed_dirs = 0 + while queue: + path = queue.popleft() + + if path.is_dir(): + for child in path.iterdir(): + queue.append(child) + + rmdir_queue.append(path) + continue + + assert path.is_file() + path.relative_to(GLOBAL_CACHE_ROOT) + + age = now - path.stat().st_mtime + # Don't trust files from the future either. + if 0 <= age <= MAX_CACHE_FILES_AGE_SECS: + continue + + path.unlink() + num_removed_files += 1 + + for dirpath in reversed(rmdir_queue): + assert dirpath.is_dir() + dirpath.relative_to(GLOBAL_CACHE_ROOT) + + try: + dirpath.rmdir() + num_removed_dirs += 1 + except OSError: + pass + + if num_removed_dirs or num_removed_files: + log.info('Cache Cleanup: removed %d dirs and %d files', num_removed_dirs, num_removed_files) + + +@contextlib.contextmanager +def track_time(tracker_object: typing.Any, attribute: str): + start_time = time.time() + yield + duration = time.time() - start_time + tracked_so_far = getattr(tracker_object, attribute, 0.0) + setattr(tracker_object, attribute, tracked_so_far + duration) + + +def decompress(filepath: Path) -> Path: + """Gunzip the file, returning '{filepath}.gunzipped'.""" + + decomppath = filepath.with_suffix('%s.gunzipped' % filepath.suffix) + + if not decomppath.exists() or filepath.stat().st_mtime >= decomppath.stat().st_mtime: + with gzip.open(str(filepath), 'rb') as infile, decomppath.open('wb') as outfile: + while True: + block = infile.read(32768) + if not block: + break + outfile.write(block) + return decomppath diff --git a/pkg/shaman/_test_file_store/stored/dc/89f15de821ad1df3e78f8ef455e653a2d1862f2eb3f5ee78aa4ca68eb6fb35/781.blob b/pkg/shaman/_test_file_store/stored/dc/89f15de821ad1df3e78f8ef455e653a2d1862f2eb3f5ee78aa4ca68eb6fb35/781.blob new file mode 100644 index 0000000000000000000000000000000000000000..d3c510001340756e703b8ca1253156c3cd1c4327 GIT binary patch literal 781 zcmYjO&1xGl5SFyRb=D?W*6tyYoDMg z*vGz-u7yHgp{I`Insz{=Z$_H==I3%a93YTQ(tZ4e5&G$jqXYonz|k*3al~mpFuT1xLRHWzY`q z(F5jqi@u@nWQ#Y1lL_L1N8tp1Y=R9?@c1i!fcIU*lP%d3j66<{7-*kO`{YtECDaQ$ zylBn3Rz=TDQOkZ{#J!Kjg*MtIT4)VfZL$`O3#Dz!IBSeUC)cytyNiQmqpEJD$pb}zL?xi+nM=6% z`~3$}890A`pzFF4RCh}yB>h~gJKeCAptm*EYDI4gQ?liFRabOb3JHwoh18}nt%0C5 ztC*yVrdzDa0@|X!c+=k8jGNArx)${a6v8P!#RD?J8P14*%ZUwIRoWPOZ`Hp|xsACv zzp3W{U=MglybiW#kN0lTc$$YchWN5r+Nfbl3p)U_l?R*x_9(~772w-Fs3DkkC~H-* zve04yKhMCW-KY>&jxtYdcE~;UNz^Gg=V!+~jE_TCG5Bqujq)w|*#DT^h?S>HCsobc QQoMHEX&05^G#F)n06Y}RLjV8( literal 0 HcmV?d00001 diff --git a/pkg/shaman/_test_file_store/stored/e7/fd2d9b2a7054baea5d776def36ba908b9857d49cee3e4746ad671abb02d23f/7459.blob b/pkg/shaman/_test_file_store/stored/e7/fd2d9b2a7054baea5d776def36ba908b9857d49cee3e4746ad671abb02d23f/7459.blob new file mode 100644 index 00000000..6676ac9c --- /dev/null +++ b/pkg/shaman/_test_file_store/stored/e7/fd2d9b2a7054baea5d776def36ba908b9857d49cee3e4746ad671abb02d23f/7459.blob @@ -0,0 +1,229 @@ +#!/usr/bin/env python3.7 + +import argparse +import atexit +from collections import deque +from dataclasses import dataclass +from pathlib import Path +import random +import typing + +import requests + +import filesystemstuff +import httpstuff + +parser = argparse.ArgumentParser() +parser.add_argument('root', type=Path) +parser.add_argument('shaman_url', type=str) +parser.add_argument('--checkout') +parser.add_argument('--sha-only', default=False, action='store_true') +parser.add_argument('--cleanup', default=False, action='store_true', help='Clean up cache files and exit') +cli_args = parser.parse_args() + +root = cli_args.root.resolve() + +if cli_args.cleanup: + filesystemstuff.cleanup_cache() + raise SystemExit('CLEAN!') + +shaman_url = httpstuff.normalise_url(cli_args.shaman_url) + +session: requests.Session() + +@dataclass +class FileInfo: + checksum: str + filesize: int + abspath: Path + +global_fileinfo = {} + + +def feed_lines() -> typing.Iterable[typing.Tuple[Path, bytes, typing.Optional[Path]]]: + for filepath in filesystemstuff.find_files(root): + content_path, checksum = filesystemstuff.compute_cached_checksum(filepath) + filesize = filepath.stat().st_size + relpath = filepath.relative_to(root) + + global_fileinfo[str(relpath)] = FileInfo( + checksum=checksum, + filesize=filesize, + abspath=filepath, + ) + + file_to_unlink = None if content_path == filepath else content_path + yield relpath, f'{checksum} {filesize} {relpath}\n'.encode('utf8'), file_to_unlink + + +def show_stats(): + print('filesystemstuff stats:') + print(f' computing checksums: {filesystemstuff.TimeInfo.computing_checksums:.3f} seconds') + print(f' handling caching : {filesystemstuff.TimeInfo.checksum_cache_handling:.3f} seconds') + + +def feed(definition_file: bytes, valid_paths: typing.Set[str]) -> typing.Set[str]: + print(f'Feeding {root} to the Shaman') + resp = session.post(f'{shaman_url}checkout/requirements', data=definition_file, stream=True) + if resp.status_code >= 300: + raise SystemExit(f'Error {resp.status_code}: {resp.text}') + + print('==========') + to_upload = deque() + for line in resp.iter_lines(): + response, path = line.decode().split(' ', 1) + print(f'{response}\t{path}') + + if path not in valid_paths: + raise RuntimeError(f'Shaman asked us for path {path!r} which we never offered') + + if response == 'file-unknown': + to_upload.appendleft(path) + elif response == 'already-uploading': + to_upload.append(path) + elif response == 'ERROR': + print(f'ERROR RESPONSE: {path}') + return + else: + print(f'UNKNOWN RESPONSE {response!r} FOR PATH {path!r}') + return + + print('==========') + print(f'Going to upload {len(to_upload)} files') + + failed_paths = upload_files(to_upload) + + if failed_paths: + print('Some files did not upload this iteration:') + for fname in sorted(failed_paths): + print(f' - {fname}') + + return failed_paths + + +def upload_files(to_upload: typing.Deque[str]) -> typing.Set[str]: + failed_paths = set() + deferred_paths = set() + + def defer(some_path: str): + nonlocal to_upload + + print(' - Shaman asked us to defer uploading this file.') + deferred_paths.add(some_path) + + # Instead of deferring this one file, randomize the files to upload. + # This prevents multiple deferrals when someone else is uploading + # files from the same project (because it probably happens alphabetically). + all_files = list(to_upload) + random.shuffle(all_files) + to_upload = deque(all_files) + to_upload.append(some_path) + + MAX_DEFERRED_PATHS = 8 + MAX_FAILED_PATHS = 8 + + while to_upload: + # After too many failures, just retry to get a fresh set of files to upload. + if len(failed_paths) > MAX_FAILED_PATHS: + print('Too many failures, going to abort this iteration') + failed_paths.update(to_upload) + return failed_paths + + path = to_upload.popleft() + fileinfo = global_fileinfo[path] + + headers = { + 'X-Shaman-Original-Filename': path + } + + # Let the Shaman know whether we can defer uploading this file or not. + can_defer = bool(len(deferred_paths) < MAX_DEFERRED_PATHS and path not in deferred_paths and len(to_upload)) + if can_defer: + headers['X-Shaman-Can-Defer-Upload'] = 'true' + + print(f'Uploading {path} ; can_defer={can_defer}') + try: + with fileinfo.abspath.open('rb') as infile: + resp = session.post( + f'{shaman_url}files/{fileinfo.checksum}/{fileinfo.filesize}', + data=infile, headers=headers) + resp.raise_for_status() + + if resp.status_code == 208: + if can_defer: + defer(path) + else: + print(' - Someone else already finished uploading this file.') + + except requests.ConnectionError as ex: + if can_defer: + # Closing the connection with an 'X-Shaman-Can-Defer-Upload: true' header + # indicates that we should defer the upload. + defer(path) + else: + print(f'Error uploading {path}, might retry later: {ex}') + failed_paths.add(path) + else: + failed_paths.discard(path) + + return failed_paths + + +def main(): + global session + + # Get an authentication token. + resp = requests.get(f'{shaman_url}get-token') + resp.raise_for_status() + session = httpstuff.session(token=resp.text) + + paths_to_unlink = set() + def unlink_temp_paths(): + for path in paths_to_unlink: + try: + if path.exists(): + path.unlink() + except Exception as ex: + print(f'Error deleting {path}: {ex}') + + + atexit.register(filesystemstuff.cleanup_cache) + atexit.register(show_stats) + atexit.register(unlink_temp_paths) + + print(f'Creating Shaman definition file from {root}') + allowed_paths = set() + definition_lines = [] + for relpath, line, content_path in feed_lines(): + allowed_paths.add(str(relpath)) + definition_lines.append(line) + paths_to_unlink.add(content_path) + + definition_file = b''.join(definition_lines) + print(f'Computed SHA sums, definition file is {len(definition_file) // 1024} KiB') + print(definition_file) + if cli_args.sha_only: + return + + for try_count in range(50): + print(f'========== Upload attempt {try_count+1}') + failed_paths = feed(definition_file, allowed_paths) + if not failed_paths: + break + + print('==========') + if failed_paths: + raise SystemExit('Aborted due to repeated upload failure') + else: + print(f'All files uploaded succesfully in {try_count+1} iterations') + + if cli_args.checkout: + print(f'Going to ask for a checkout with ID {cli_args.checkout}') + resp = session.post(f'{shaman_url}checkout/create/{cli_args.checkout}', data=definition_file) + resp.raise_for_status() + print(f'Received status {resp.status_code}: {resp.text}') + else: + print('Not asking for a checkout, use --checkout if you want this.') + +if __name__ == '__main__': + main() diff --git a/pkg/shaman/checkout/checkout_id.go b/pkg/shaman/checkout/checkout_id.go new file mode 100644 index 00000000..eea85e2c --- /dev/null +++ b/pkg/shaman/checkout/checkout_id.go @@ -0,0 +1,31 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package checkout + +import "regexp" + +var validCheckoutRegexp = regexp.MustCompile("^[a-zA-Z0-9_]+$") + +func isValidCheckoutID(checkoutID string) bool { + return validCheckoutRegexp.MatchString(checkoutID) +} diff --git a/pkg/shaman/checkout/definition.go b/pkg/shaman/checkout/definition.go new file mode 100644 index 00000000..335b1f74 --- /dev/null +++ b/pkg/shaman/checkout/definition.go @@ -0,0 +1,168 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package checkout + +import ( + "bufio" + "context" + "fmt" + "io" + "path" + "regexp" + "strconv" + "strings" + + "github.com/sirupsen/logrus" +) + +/* Checkout Definition files contain a line for each to-be-checked-out file. + * Each line consists of three fields: checksum, file size, path in the checkout. + */ + +// FileInvalidError is returned when there is an invalid line in a checkout definition file. +type FileInvalidError struct { + lineNumber int // base-1 line number that's bad + innerErr error + reason string +} + +func (cfie FileInvalidError) Error() string { + return fmt.Sprintf("invalid line %d: %s", cfie.lineNumber, cfie.reason) +} + +// DefinitionLine is a single line in a checkout definition file. +type DefinitionLine struct { + Checksum string + FileSize int64 + FilePath string +} + +// DefinitionReader reads and parses a checkout definition +type DefinitionReader struct { + ctx context.Context + channel chan *DefinitionLine + reader *bufio.Reader + + Err error + LineNumber int +} + +var ( + // This is a wider range than used in SHA256 sums, but there is no harm in accepting a few more ASCII letters. + validChecksumRegexp = regexp.MustCompile("^[a-zA-Z0-9]{16,}$") +) + +// NewDefinitionReader creates a new DefinitionReader for the given reader. +func NewDefinitionReader(ctx context.Context, reader io.Reader) *DefinitionReader { + return &DefinitionReader{ + ctx: ctx, + channel: make(chan *DefinitionLine), + reader: bufio.NewReader(reader), + } +} + +// Read spins up a new goroutine for parsing the checkout definition. +// The returned channel will receive definition lines. +func (fr *DefinitionReader) Read() <-chan *DefinitionLine { + go func() { + defer close(fr.channel) + defer logrus.Debug("done reading request") + + for { + line, err := fr.reader.ReadString('\n') + if err != nil && err != io.EOF { + fr.Err = FileInvalidError{ + lineNumber: fr.LineNumber, + innerErr: err, + reason: fmt.Sprintf("I/O error: %v", err), + } + return + } + if err == io.EOF && line == "" { + return + } + + if contextError := fr.ctx.Err(); contextError != nil { + fr.Err = fr.ctx.Err() + return + } + + fr.LineNumber++ + logrus.WithFields(logrus.Fields{ + "line": line, + "number": fr.LineNumber, + }).Debug("read line") + + line = strings.TrimSpace(line) + if line == "" { + continue + } + + definitionLine, err := fr.parseLine(line) + if err != nil { + fr.Err = err + return + } + + fr.channel <- definitionLine + } + }() + + return fr.channel +} + +func (fr *DefinitionReader) parseLine(line string) (*DefinitionLine, error) { + + parts := strings.SplitN(strings.TrimSpace(line), " ", 3) + if len(parts) != 3 { + return nil, FileInvalidError{ + lineNumber: fr.LineNumber, + reason: fmt.Sprintf("line should consist of three space-separated parts, not %d: %v", + len(parts), line), + } + } + + checksum := parts[0] + if !validChecksumRegexp.MatchString(checksum) { + return nil, FileInvalidError{fr.LineNumber, nil, "invalid checksum"} + } + + fileSize, err := strconv.ParseInt(parts[1], 10, 64) + if err != nil { + return nil, FileInvalidError{fr.LineNumber, err, "invalid file size"} + } + + filePath := strings.TrimSpace(parts[2]) + if path.IsAbs(filePath) { + return nil, FileInvalidError{fr.LineNumber, err, "no absolute paths allowed"} + } + if filePath != path.Clean(filePath) || strings.Contains(filePath, "..") { + return nil, FileInvalidError{fr.LineNumber, err, "paths must be clean and not have any .. in them."} + } + + return &DefinitionLine{ + Checksum: parts[0], + FileSize: fileSize, + FilePath: filePath, + }, nil +} diff --git a/pkg/shaman/checkout/definition_test.go b/pkg/shaman/checkout/definition_test.go new file mode 100644 index 00000000..58a2039a --- /dev/null +++ b/pkg/shaman/checkout/definition_test.go @@ -0,0 +1,86 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package checkout + +import ( + "bytes" + "context" + "os" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestDefinitionReader(t *testing.T) { + file, err := os.Open("definition_test_example.txt") + if err != nil { + panic(err) + } + defer file.Close() + + ctx, cancelFunc := context.WithCancel(context.Background()) + reader := NewDefinitionReader(ctx, file) + readChan := reader.Read() + + line := <-readChan + assert.Equal(t, "35b0491c27b0333d1fb45fc0789a12ca06b1d640d2569780b807de504d7029e0", line.Checksum) + assert.Equal(t, int64(1424), line.FileSize) + assert.Equal(t, "definition.go", line.FilePath) + + line = <-readChan + assert.Equal(t, "63b72c63b9424fd13b9370fb60069080c3a15717cf3ad442635b187c6a895079", line.Checksum) + assert.Equal(t, int64(127), line.FileSize) + assert.Equal(t, "logging.go", line.FilePath) + assert.Nil(t, reader.Err) + + // Cancelling is only found out after the next read. + cancelFunc() + line = <-readChan + assert.Nil(t, line) + assert.Equal(t, context.Canceled, reader.Err) + assert.Equal(t, 2, reader.LineNumber) +} + +func TestDefinitionReaderBadRequests(t *testing.T) { + ctx := context.Background() + + testRejects := func(checksum, path string) { + buffer := bytes.NewReader([]byte(checksum + " 30 " + path)) + reader := NewDefinitionReader(ctx, buffer) + readChan := reader.Read() + + var line *DefinitionLine + line = <-readChan + assert.Nil(t, line) + assert.NotNil(t, reader.Err) + assert.Equal(t, 1, reader.LineNumber) + } + + testRejects("35b0491c27b0333d1fb45fc0789a12c", "/etc/passwd") // absolute + testRejects("35b0491c27b0333d1fb45fc0789a12c", "../../../../../../etc/passwd") // ../ in there that path.Clean() will keep + testRejects("35b0491c27b0333d1fb45fc0789a12c", "some/path/../etc/passwd") // ../ in there that path.Clean() will remove + + testRejects("35b", "some/path") // checksum way too short + testRejects("35b0491c.7b0333d1fb45fc0789a12c", "some/path") // checksum invalid + testRejects("35b0491c/7b0333d1fb45fc0789a12c", "some/path") // checksum invalid +} diff --git a/pkg/shaman/checkout/definition_test_example.txt b/pkg/shaman/checkout/definition_test_example.txt new file mode 100644 index 00000000..9d781163 --- /dev/null +++ b/pkg/shaman/checkout/definition_test_example.txt @@ -0,0 +1,5 @@ +35b0491c27b0333d1fb45fc0789a12ca06b1d640d2569780b807de504d7029e0 1424 definition.go +63b72c63b9424fd13b9370fb60069080c3a15717cf3ad442635b187c6a895079 127 logging.go +9f1470441beb98dbb66e3339e7da697d9c2312999a6a5610c461cbf55040e210 795 manager.go +59c6bd72af62aa860343adcafd46e3998934a9db2997ce08514b4361f099fa58 1134 routes.go +59c6bd72af62aa860343adcafd46e3998934a9db2997ce08514b4361f099fa58 1134 another-routes.go diff --git a/pkg/shaman/checkout/logging.go b/pkg/shaman/checkout/logging.go new file mode 100644 index 00000000..84a83858 --- /dev/null +++ b/pkg/shaman/checkout/logging.go @@ -0,0 +1,29 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package checkout + +import ( + "github.com/sirupsen/logrus" +) + +var packageLogger = logrus.WithField("package", "shaman/checkout") diff --git a/pkg/shaman/checkout/manager.go b/pkg/shaman/checkout/manager.go new file mode 100644 index 00000000..43f7face --- /dev/null +++ b/pkg/shaman/checkout/manager.go @@ -0,0 +1,237 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package checkout + +import ( + "errors" + "os" + "path" + "path/filepath" + "sync" + "time" + + "github.com/sirupsen/logrus" + + "git.blender.org/flamenco/pkg/shaman/config" + "git.blender.org/flamenco/pkg/shaman/filestore" + "git.blender.org/flamenco/pkg/shaman/touch" +) + +// Manager creates checkouts and provides info about missing files. +type Manager struct { + checkoutBasePath string + fileStore filestore.Storage + + wg sync.WaitGroup +} + +// ResolvedCheckoutInfo contains the result of validating the Checkout ID and parsing it into a final path. +type ResolvedCheckoutInfo struct { + // The absolute path on our filesystem. + absolutePath string + // The path relative to the Manager.checkoutBasePath. This is what is + // sent back to the client. + RelativePath string +} + +// Errors returned by the Checkout Manager. +var ( + ErrCheckoutAlreadyExists = errors.New("A checkout with this ID already exists") + ErrInvalidCheckoutID = errors.New("The Checkout ID is invalid") +) + +// NewManager creates and returns a new Checkout Manager. +func NewManager(conf config.Config, fileStore filestore.Storage) *Manager { + logger := packageLogger.WithField("checkoutDir", conf.CheckoutPath) + logger.Info("opening checkout directory") + + err := os.MkdirAll(conf.CheckoutPath, 0777) + if err != nil { + logger.WithError(err).Fatal("unable to create checkout directory") + } + + return &Manager{conf.CheckoutPath, fileStore, sync.WaitGroup{}} +} + +// Close waits for still-running touch() calls to finish, then returns. +func (m *Manager) Close() { + packageLogger.Info("shutting down Checkout manager") + m.wg.Wait() +} + +func (m *Manager) pathForCheckoutID(checkoutID string) (ResolvedCheckoutInfo, error) { + if !isValidCheckoutID(checkoutID) { + return ResolvedCheckoutInfo{}, ErrInvalidCheckoutID + } + + // When changing the number of path components the checkout ID is turned into, + // be sure to also update the EraseCheckout() function for this. + + // We're expecting ObjectIDs as checkoutIDs, which means most variation + // is in the last characters. + lastBitIndex := len(checkoutID) - 2 + relativePath := path.Join(checkoutID[lastBitIndex:], checkoutID) + + return ResolvedCheckoutInfo{ + absolutePath: path.Join(m.checkoutBasePath, relativePath), + RelativePath: relativePath, + }, nil +} + +// PrepareCheckout creates the root directory for a specific checkout. +// Returns the path relative to the checkout root directory. +func (m *Manager) PrepareCheckout(checkoutID string) (ResolvedCheckoutInfo, error) { + checkoutPaths, err := m.pathForCheckoutID(checkoutID) + if err != nil { + return ResolvedCheckoutInfo{}, err + } + + logger := logrus.WithFields(logrus.Fields{ + "checkoutPath": checkoutPaths.absolutePath, + "checkoutID": checkoutID, + }) + + if stat, err := os.Stat(checkoutPaths.absolutePath); !os.IsNotExist(err) { + if err == nil { + if stat.IsDir() { + logger.Debug("checkout path exists") + } else { + logger.Error("checkout path exists but is not a directory") + } + // No error stat'ing this path, indicating it's an existing checkout. + return ResolvedCheckoutInfo{}, ErrCheckoutAlreadyExists + } + // If it's any other error, it's really a problem on our side. + logger.WithError(err).Error("unable to stat checkout directory") + return ResolvedCheckoutInfo{}, err + } + + if err := os.MkdirAll(checkoutPaths.absolutePath, 0777); err != nil { + logger.WithError(err).Fatal("unable to create checkout directory") + } + + logger.WithField("relPath", checkoutPaths.RelativePath).Info("created checkout directory") + return checkoutPaths, nil +} + +// EraseCheckout removes the checkout directory structure identified by the ID. +func (m *Manager) EraseCheckout(checkoutID string) error { + checkoutPaths, err := m.pathForCheckoutID(checkoutID) + if err != nil { + return err + } + + logger := logrus.WithFields(logrus.Fields{ + "checkoutPath": checkoutPaths.absolutePath, + "checkoutID": checkoutID, + }) + if err := os.RemoveAll(checkoutPaths.absolutePath); err != nil { + logger.WithError(err).Error("unable to remove checkout directory") + return err + } + + // Try to remove the parent path as well, to not keep the dangling two-letter dirs. + // Failure is fine, though, because there is no guarantee it's empty anyway. + os.Remove(path.Dir(checkoutPaths.absolutePath)) + logger.Info("removed checkout directory") + return nil +} + +// SymlinkToCheckout creates a symlink at symlinkPath to blobPath. +// It does *not* do any validation of the validity of the paths! +func (m *Manager) SymlinkToCheckout(blobPath, checkoutPath, symlinkRelativePath string) error { + symlinkPath := path.Join(checkoutPath, symlinkRelativePath) + logger := logrus.WithFields(logrus.Fields{ + "blobPath": blobPath, + "symlinkPath": symlinkPath, + }) + + blobPath, err := filepath.Abs(blobPath) + if err != nil { + logger.WithError(err).Error("unable to make blobPath absolute") + return err + } + + logger.Debug("creating symlink") + + // This is expected to fail sometimes, because we don't create parent directories yet. + // We only create those when we get a failure from symlinking. + err = os.Symlink(blobPath, symlinkPath) + if err == nil { + return err + } + if !os.IsNotExist(err) { + logger.WithError(err).Error("unable to create symlink") + return err + } + + logger.Debug("creating parent directory") + + dir := path.Dir(symlinkPath) + if err := os.MkdirAll(dir, 0777); err != nil { + logger.WithError(err).Error("unable to create parent directory") + return err + } + + if err := os.Symlink(blobPath, symlinkPath); err != nil { + logger.WithError(err).Error("unable to create symlink, after creating parent directory") + return err + } + + // Change the modification time of the blob to mark it as 'referenced' just now. + m.wg.Add(1) + go func() { + touchFile(blobPath) + m.wg.Done() + }() + + return nil +} + +// touchFile changes the modification time of the blob to mark it as 'referenced' just now. +func touchFile(blobPath string) error { + if blobPath == "" { + return os.ErrInvalid + } + now := time.Now() + + logger := logrus.WithField("file", blobPath) + logger.Debug("touching") + + err := touch.Touch(blobPath) + logLevel := logrus.DebugLevel + if err != nil { + logger = logger.WithError(err) + logLevel = logrus.WarnLevel + } + + duration := time.Now().Sub(now) + logger = logger.WithField("duration", duration) + if duration < 1*time.Second { + logger.Log(logLevel, "done touching") + } else { + logger.Log(logLevel, "done touching but took a long time") + } + + return err +} diff --git a/pkg/shaman/checkout/manager_test.go b/pkg/shaman/checkout/manager_test.go new file mode 100644 index 00000000..0cb45fc7 --- /dev/null +++ b/pkg/shaman/checkout/manager_test.go @@ -0,0 +1,76 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package checkout + +import ( + "io/ioutil" + "os" + "path" + "testing" + "time" + + "git.blender.org/flamenco/pkg/shaman/config" + "git.blender.org/flamenco/pkg/shaman/filestore" + "github.com/stretchr/testify/assert" +) + +func createTestManager() (*Manager, func()) { + conf, confCleanup := config.CreateTestConfig() + fileStore := filestore.New(conf) + manager := NewManager(conf, fileStore) + return manager, confCleanup +} + +func TestSymlinkToCheckout(t *testing.T) { + manager, cleanup := createTestManager() + defer cleanup() + + // Fake an older file. + blobPath := path.Join(manager.checkoutBasePath, "jemoeder.blob") + err := ioutil.WriteFile(blobPath, []byte("op je hoofd"), 0600) + assert.Nil(t, err) + + wayBackWhen := time.Now().Add(-time.Hour * 24 * 100) + err = os.Chtimes(blobPath, wayBackWhen, wayBackWhen) + assert.Nil(t, err) + + symlinkRelativePath := "path/to/jemoeder.txt" + err = manager.SymlinkToCheckout(blobPath, manager.checkoutBasePath, symlinkRelativePath) + assert.Nil(t, err) + + // Wait for touch() calls to be done. + manager.wg.Wait() + + // The blob should have been touched to indicate it was referenced just now. + stat, err := os.Stat(blobPath) + assert.Nil(t, err) + assert.True(t, + stat.ModTime().After(wayBackWhen), + "File must be touched (%v must be later than %v)", stat.ModTime(), wayBackWhen) + + symlinkPath := path.Join(manager.checkoutBasePath, symlinkRelativePath) + stat, err = os.Lstat(symlinkPath) + assert.Nil(t, err) + assert.True(t, stat.Mode()&os.ModeType == os.ModeSymlink, + "%v should be a symlink", symlinkPath) +} diff --git a/pkg/shaman/checkout/routes.go b/pkg/shaman/checkout/routes.go new file mode 100644 index 00000000..c34de038 --- /dev/null +++ b/pkg/shaman/checkout/routes.go @@ -0,0 +1,191 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package checkout + +import ( + "fmt" + "net/http" + "strings" + + "git.blender.org/flamenco/pkg/shaman/filestore" + "git.blender.org/flamenco/pkg/shaman/httpserver" + + "git.blender.org/flamenco/pkg/shaman/jwtauth" + "github.com/gorilla/mux" + "github.com/sirupsen/logrus" +) + +// Responses for each line of a checkout definition file. +const ( + responseFileUnkown = "file-unknown" + responseAlreadyUploading = "already-uploading" + responseError = "ERROR" +) + +// AddRoutes adds HTTP routes to the muxer. +func (m *Manager) AddRoutes(router *mux.Router, auther jwtauth.Authenticator) { + router.Handle("/checkout/requirements", auther.WrapFunc(m.reportRequirements)).Methods("POST") + router.Handle("/checkout/create/{checkoutID}", auther.WrapFunc(m.createCheckout)).Methods("POST") +} + +func (m *Manager) reportRequirements(w http.ResponseWriter, r *http.Request) { + logger := packageLogger.WithFields(jwtauth.RequestLogFields(r)) + logger.Debug("user requested checkout requirements") + + w.Header().Set("Content-Type", "text/plain; charset=utf-8") + if r.Header.Get("Content-Type") != "text/plain" { + http.Error(w, "Expecting text/plain content type", http.StatusBadRequest) + return + } + + bodyReader, err := httpserver.DecompressedReader(r) + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + defer bodyReader.Close() + + // Unfortunately, Golang doesn't allow us (for good reason) to send a reply while + // still reading the response. See https://github.com/golang/go/issues/4637 + responseLines := []string{} + alreadyRequested := map[string]bool{} + reader := NewDefinitionReader(r.Context(), bodyReader) + for line := range reader.Read() { + fileKey := fmt.Sprintf("%s/%d", line.Checksum, line.FileSize) + if alreadyRequested[fileKey] { + // User asked for this (checksum, filesize) tuple already. + continue + } + + path, status := m.fileStore.ResolveFile(line.Checksum, line.FileSize, filestore.ResolveEverything) + + response := "" + switch status { + case filestore.StatusDoesNotExist: + // Caller can upload this file immediately. + response = responseFileUnkown + case filestore.StatusUploading: + // Caller should postpone uploading this file until all 'does-not-exist' files have been uploaded. + response = responseAlreadyUploading + case filestore.StatusStored: + // We expect this file to be sent soon, though, so we need to + // 'touch' it to make sure it won't be GC'd in the mean time. + go touchFile(path) + + // Only send a response when the caller needs to do something. + continue + default: + logger.WithFields(logrus.Fields{ + "path": path, + "status": status, + "checksum": line.Checksum, + "filesize": line.FileSize, + }).Error("invalid status returned by ResolveFile") + continue + } + + alreadyRequested[fileKey] = true + responseLines = append(responseLines, fmt.Sprintf("%s %s\n", response, line.FilePath)) + } + if reader.Err != nil { + logger.WithError(reader.Err).Warning("error reading checkout definition") + http.Error(w, fmt.Sprintf("%s %v\n", responseError, reader.Err), http.StatusBadRequest) + return + } + + w.WriteHeader(http.StatusOK) + w.Write([]byte(strings.Join(responseLines, ""))) +} + +func (m *Manager) createCheckout(w http.ResponseWriter, r *http.Request) { + checkoutID := mux.Vars(r)["checkoutID"] + + logger := packageLogger.WithFields(jwtauth.RequestLogFields(r)).WithField("checkoutID", checkoutID) + logger.Debug("user requested checkout creation") + + w.Header().Set("Content-Type", "text/plain; charset=utf-8") + if r.Header.Get("Content-Type") != "text/plain" { + http.Error(w, "Expecting text/plain content type", http.StatusBadRequest) + return + } + bodyReader, err := httpserver.DecompressedReader(r) + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + defer bodyReader.Close() + + // Actually create the checkout. + resolvedCheckoutInfo, err := m.PrepareCheckout(checkoutID) + if err != nil { + switch err { + case ErrInvalidCheckoutID: + http.Error(w, fmt.Sprintf("invalid checkout ID '%s'", checkoutID), http.StatusBadRequest) + case ErrCheckoutAlreadyExists: + http.Error(w, fmt.Sprintf("checkout '%s' already exists", checkoutID), http.StatusConflict) + default: + http.Error(w, err.Error(), http.StatusInternalServerError) + } + return + } + + // The checkout directory was created, so if anything fails now, it should be erased. + var checkoutOK bool + defer func() { + if !checkoutOK { + m.EraseCheckout(checkoutID) + } + }() + + responseLines := []string{} + reader := NewDefinitionReader(r.Context(), bodyReader) + for line := range reader.Read() { + blobPath, status := m.fileStore.ResolveFile(line.Checksum, line.FileSize, filestore.ResolveStoredOnly) + if status != filestore.StatusStored { + // Caller should upload this file before we can create the checkout. + responseLines = append(responseLines, fmt.Sprintf("%s %s\n", responseFileUnkown, line.FilePath)) + continue + } + + if err := m.SymlinkToCheckout(blobPath, resolvedCheckoutInfo.absolutePath, line.FilePath); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + } + if reader.Err != nil { + http.Error(w, fmt.Sprintf("ERROR %v\n", reader.Err), http.StatusBadRequest) + return + } + + // If there was any file missing, we should just stop now. + if len(responseLines) > 0 { + http.Error(w, strings.Join(responseLines, ""), http.StatusBadRequest) + return + } + + w.WriteHeader(http.StatusOK) + w.Write([]byte(resolvedCheckoutInfo.RelativePath)) + + checkoutOK = true // Prevent the checkout directory from being erased again. + logger.Info("checkout created") +} diff --git a/pkg/shaman/checkout/routes_test.go b/pkg/shaman/checkout/routes_test.go new file mode 100644 index 00000000..376d2f34 --- /dev/null +++ b/pkg/shaman/checkout/routes_test.go @@ -0,0 +1,125 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package checkout + +import ( + "io/ioutil" + "net/http" + "net/http/httptest" + "os" + "path" + "strings" + "testing" + + "git.blender.org/flamenco/pkg/shaman/filestore" + "git.blender.org/flamenco/pkg/shaman/httpserver" + "github.com/gorilla/mux" + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" +) + +func TestReportRequirements(t *testing.T) { + manager, cleanup := createTestManager() + defer cleanup() + + defFile, err := ioutil.ReadFile("definition_test_example.txt") + assert.Nil(t, err) + compressedDefFile := httpserver.CompressBuffer(defFile) + + // 5 files, all ending in newline, so defFileLines has trailing "" element. + defFileLines := strings.Split(string(defFile), "\n") + assert.Equal(t, 6, len(defFileLines), defFileLines) + + respRec := httptest.NewRecorder() + req := httptest.NewRequest("POST", "/checkout/requirement", compressedDefFile) + req.Header.Set("Content-Type", "text/plain") + req.Header.Set("Content-Encoding", "gzip") + manager.reportRequirements(respRec, req) + + bodyBytes, err := ioutil.ReadAll(respRec.Body) + assert.Nil(t, err) + body := string(bodyBytes) + + assert.Equal(t, respRec.Code, http.StatusOK, body) + + // We should not be required to upload the same file twice, + // so another-routes.go should not be in the response. + lines := strings.Split(body, "\n") + expectLines := []string{ + "file-unknown definition.go", + "file-unknown logging.go", + "file-unknown manager.go", + "file-unknown routes.go", + "", + } + assert.EqualValues(t, expectLines, lines) +} + +func TestCreateCheckout(t *testing.T) { + manager, cleanup := createTestManager() + defer cleanup() + + filestore.LinkTestFileStore(manager.fileStore.BasePath()) + + defFile, err := ioutil.ReadFile("../_test_file_store/checkout_definition.txt") + assert.Nil(t, err) + compressedDefFile := httpserver.CompressBuffer(defFile) + + respRec := httptest.NewRecorder() + req := httptest.NewRequest("POST", "/checkout/create/{checkoutID}", compressedDefFile) + req = mux.SetURLVars(req, map[string]string{ + "checkoutID": "jemoeder", + }) + req.Header.Set("Content-Type", "text/plain") + req.Header.Set("Content-Encoding", "gzip") + logrus.SetLevel(logrus.DebugLevel) + manager.createCheckout(respRec, req) + + bodyBytes, err := ioutil.ReadAll(respRec.Body) + assert.Nil(t, err) + body := string(bodyBytes) + assert.Equal(t, http.StatusOK, respRec.Code, body) + + // Check the symlinks of the checkout + coPath := path.Join(manager.checkoutBasePath, "er", "jemoeder") + assert.FileExists(t, path.Join(coPath, "subdir", "replacer.py")) + assert.FileExists(t, path.Join(coPath, "feed.py")) + assert.FileExists(t, path.Join(coPath, "httpstuff.py")) + assert.FileExists(t, path.Join(coPath, "filesystemstuff.py")) + + storePath := manager.fileStore.StoragePath() + assertLinksTo(t, path.Join(coPath, "subdir", "replacer.py"), + path.Join(storePath, "59", "0c148428d5c35fab3ebad2f3365bb469ab9c531b60831f3e826c472027a0b9", "3367.blob")) + assertLinksTo(t, path.Join(coPath, "feed.py"), + path.Join(storePath, "80", "b749c27b2fef7255e7e7b3c2029b03b31299c75ff1f1c72732081c70a713a3", "7488.blob")) + assertLinksTo(t, path.Join(coPath, "httpstuff.py"), + path.Join(storePath, "91", "4853599dd2c351ab7b82b219aae6e527e51518a667f0ff32244b0c94c75688", "486.blob")) + assertLinksTo(t, path.Join(coPath, "filesystemstuff.py"), + path.Join(storePath, "d6", "fc7289b5196cc96748ea72f882a22c39b8833b457fe854ef4c03a01f5db0d3", "7217.blob")) +} + +func assertLinksTo(t *testing.T, linkPath, expectedTarget string) { + actualTarget, err := os.Readlink(linkPath) + assert.Nil(t, err) + assert.Equal(t, expectedTarget, actualTarget) +} diff --git a/pkg/shaman/cleanup.go b/pkg/shaman/cleanup.go new file mode 100644 index 00000000..cb35974c --- /dev/null +++ b/pkg/shaman/cleanup.go @@ -0,0 +1,253 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package shaman + +import ( + "os" + "path/filepath" + "time" + + "github.com/rs/zerolog" + "github.com/rs/zerolog/log" +) + +// Mapping from absolute path to the file's mtime. +type mtimeMap map[string]time.Time + +// GCStats contains statistics of a garbage collection run. +type GCStats struct { + numSymlinksChecked int + numOldFiles int + numUnusedOldFiles int + numStillUsedOldFiles int + numFilesDeleted int + numFilesNotDeleted int + bytesDeleted int64 +} + +func (s *Server) periodicCleanup() { + defer packageLogger.Debug("shutting down period cleanup") + defer s.wg.Done() + + for { + s.GCStorage(false) + + select { + case <-s.shutdownChan: + return + case <-time.After(s.config.GarbageCollect.Period): + } + } +} + +func (s *Server) gcAgeThreshold() time.Time { + return time.Now().Add(-s.config.GarbageCollect.MaxAge).Round(1 * time.Second) + +} + +// GCStorage performs garbage collection by deleting files from storage +// that are not symlinked in a checkout and haven't been touched since +// a threshold date. +func (s *Server) GCStorage(doDryRun bool) (stats GCStats) { + ageThreshold := s.gcAgeThreshold() + + logger := log.With(). + Str("checkoutPath", s.config.CheckoutPath). + Str("fileStorePath", s.fileStore.StoragePath()). + Time("ageThreshold", ageThreshold). + Logger() + if doDryRun { + logger = logger.With().Bool("dryRun", doDryRun).Logger() + } + + logger.Info().Msg("performing garbage collection on storage") + + // Scan the storage for all the paths that are older than the threshold. + oldFiles, err := s.gcFindOldFiles(ageThreshold, logger) + if err != nil { + logger.Error().Err(err).Msg("unable to walk file store path to find old files") + return + } + if len(oldFiles) == 0 { + logger.Debug().Msg("found no old files during garbage collection scan") + return + } + + stats.numOldFiles = len(oldFiles) + stats.numFilesNotDeleted = stats.numOldFiles + logger.Info().Int("numOldFiles", stats.numOldFiles). + Msg("found old files, going to check for links") + + // Scan the checkout area and extra checkout paths, and discard any old file that is linked. + dirsToCheck := []string{s.config.CheckoutPath} + dirsToCheck = append(dirsToCheck, s.config.GarbageCollect.ExtraCheckoutDirs...) + for _, checkDir := range dirsToCheck { + if err := s.gcFilterLinkedFiles(checkDir, oldFiles, logger, &stats); err != nil { + logger.Error(). + Str("checkoutPath", checkDir). + Err(err). + Msg("unable to walk checkout path to find symlinks") + return + } + } + stats.numStillUsedOldFiles = stats.numOldFiles - len(oldFiles) + stats.numUnusedOldFiles = len(oldFiles) + infoLogger := logger.With(). + Int("numUnusedOldFiles", stats.numUnusedOldFiles). + Int("numStillUsedOldFiles", stats.numStillUsedOldFiles). + Int("numSymlinksChecked", stats.numSymlinksChecked). + Logger() + + if len(oldFiles) == 0 { + infoLogger.Info().Msg("all old files are in use") + return + } + + infoLogger.Info().Msg("found unused old files, going to delete") + + stats.numFilesDeleted, stats.bytesDeleted = s.gcDeleteOldFiles(doDryRun, oldFiles, logger) + stats.numFilesNotDeleted = stats.numOldFiles - stats.numFilesDeleted + + infoLogger.Info(). + Int("numFilesDeleted", stats.numFilesDeleted). + Int("numFilesNotDeleted", stats.numFilesNotDeleted). + Int64("freedBytes", stats.bytesDeleted). + Str("freedSize", humanizeByteSize(stats.bytesDeleted)). + Msg("removed unused old files") + + return +} + +func (s *Server) gcFindOldFiles(ageThreshold time.Time, logger zerolog.Logger) (mtimeMap, error) { + oldFiles := mtimeMap{} + visit := func(path string, info os.FileInfo, err error) error { + select { + case <-s.shutdownChan: + return filepath.SkipDir + default: + } + + if err != nil { + logger.Debug().Err(err).Msg("error while walking file store path to find old files") + return err + } + if info.IsDir() { + return nil + } + modTime := info.ModTime() + isOld := modTime.Before(ageThreshold) + // logger.WithFields(logrus.Fields{ + // "path": path, + // "mtime": info.ModTime(), + // "threshold": ageThreshold, + // "isOld": isOld, + // }).Debug("comparing mtime") + if isOld { + oldFiles[path] = modTime + } + return nil + } + if err := filepath.Walk(s.fileStore.StoragePath(), visit); err != nil { + logger.Error().Err(err).Msg("unable to walk file store path to find old files") + return nil, err + } + + return oldFiles, nil +} + +// gcFilterLinkedFiles removes all still-symlinked paths from 'oldFiles'. +func (s *Server) gcFilterLinkedFiles(checkoutPath string, oldFiles mtimeMap, logger zerolog.Logger, stats *GCStats) error { + logger = logger.With().Str("checkoutPath", checkoutPath).Logger() + + visit := func(path string, info os.FileInfo, err error) error { + select { + case <-s.shutdownChan: + return filepath.SkipDir + default: + } + + if err != nil { + logger.Debug().Err(err).Msg("error while walking checkout path while searching for symlinks") + return err + } + if info.IsDir() || info.Mode()&os.ModeSymlink == 0 { + return nil + } + + if stats != nil { + stats.numSymlinksChecked++ + } + linkTarget, err := filepath.EvalSymlinks(path) + if err != nil { + if os.IsNotExist(err) { + return nil + } + + logger.Warn(). + Str("linkPath", path). + Err(err). + Msg("unable to determine target of symlink; ignoring") + return nil + } + + // Delete the link target from the old files, if it was there at all. + delete(oldFiles, linkTarget) + return nil + } + if err := filepath.Walk(checkoutPath, visit); err != nil { + logger.Error().Err(err).Msg("unable to walk checkout path while searching for symlinks") + return err + } + + return nil +} + +func (s *Server) gcDeleteOldFiles(doDryRun bool, oldFiles mtimeMap, logger zerolog.Logger) (int, int64) { + deletedFiles := 0 + var deletedBytes int64 + for path, lastSeenModTime := range oldFiles { + pathLogger := logger.With().Str("path", path).Logger() + + if stat, err := os.Stat(path); err != nil { + if !os.IsNotExist(err) { + pathLogger.Warn().Err(err).Msg("unable to stat to-be-deleted file") + } + } else if stat.ModTime().After(lastSeenModTime) { + pathLogger.Info().Msg("not deleting recently-touched file") + continue + } else { + deletedBytes += stat.Size() + } + + if doDryRun { + pathLogger.Info().Msg("would delete unused file") + } else { + pathLogger.Info().Msg("deleting unused file") + if err := s.fileStore.RemoveStoredFile(path); err == nil { + deletedFiles++ + } + } + } + + return deletedFiles, deletedBytes +} diff --git a/pkg/shaman/cleanup_test.go b/pkg/shaman/cleanup_test.go new file mode 100644 index 00000000..c1b1efa3 --- /dev/null +++ b/pkg/shaman/cleanup_test.go @@ -0,0 +1,225 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package shaman + +import ( + "os" + "path" + "testing" + "time" + + "git.blender.org/flamenco/pkg/shaman/config" + "git.blender.org/flamenco/pkg/shaman/filestore" + "git.blender.org/flamenco/pkg/shaman/jwtauth" + "github.com/rs/zerolog/log" + "github.com/stretchr/testify/assert" +) + +func createTestShaman() (*Server, func()) { + conf, confCleanup := config.CreateTestConfig() + shaman := NewServer(conf, jwtauth.AlwaysDeny{}) + return shaman, confCleanup +} + +func makeOld(shaman *Server, expectOld mtimeMap, relPath string) { + oldTime := time.Now().Add(-2 * shaman.config.GarbageCollect.MaxAge) + absPath := path.Join(shaman.config.FileStorePath, relPath) + + err := os.Chtimes(absPath, oldTime, oldTime) + if err != nil { + panic(err) + } + + // Do a stat on the file to get the actual on-disk mtime (could be rounded/truncated). + stat, err := os.Stat(absPath) + if err != nil { + panic(err) + } + expectOld[absPath] = stat.ModTime() +} + +func TestGCCanary(t *testing.T) { + server, cleanup := createTestShaman() + defer cleanup() + + assert.True(t, server.config.GarbageCollect.MaxAge > 10*time.Minute, + "config.GarbageCollect.MaxAge must be big enough for this test to be reliable, is %v", + server.config.GarbageCollect.MaxAge) +} + +func TestGCFindOldFiles(t *testing.T) { + server, cleanup := createTestShaman() + defer cleanup() + + filestore.LinkTestFileStore(server.config.FileStorePath) + + // Since all the links have just been created, nothing should be considered old. + ageThreshold := server.gcAgeThreshold() + old, err := server.gcFindOldFiles(ageThreshold, log.With().Str("test", "test").Logger()) + assert.Nil(t, err) + assert.EqualValues(t, mtimeMap{}, old) + + // Make some files old, they should show up in a scan. + expectOld := mtimeMap{} + makeOld(server, expectOld, "stored/59/0c148428d5c35fab3ebad2f3365bb469ab9c531b60831f3e826c472027a0b9/3367.blob") + makeOld(server, expectOld, "stored/80/b749c27b2fef7255e7e7b3c2029b03b31299c75ff1f1c72732081c70a713a3/7488.blob") + makeOld(server, expectOld, "stored/dc/89f15de821ad1df3e78f8ef455e653a2d1862f2eb3f5ee78aa4ca68eb6fb35/781.blob") + + old, err = server.gcFindOldFiles(ageThreshold, log.With().Str("package", "shaman/test").Logger()) + assert.Nil(t, err) + assert.EqualValues(t, expectOld, old) +} + +// Test of the lower-level functions of the garbage collector. +func TestGCComponents(t *testing.T) { + server, cleanup := createTestShaman() + defer cleanup() + + extraCheckoutDir := path.Join(server.config.TestTempDir, "extra-checkout") + server.config.GarbageCollect.ExtraCheckoutDirs = []string{extraCheckoutDir} + + filestore.LinkTestFileStore(server.config.FileStorePath) + + copymap := func(somemap mtimeMap) mtimeMap { + theCopy := mtimeMap{} + for key, value := range somemap { + theCopy[key] = value + } + return theCopy + } + + // Make some files old. + expectOld := mtimeMap{} + makeOld(server, expectOld, "stored/30/928ffced04c7008f3324fded86d133effea50828f5ad896196f2a2e190ac7e/6001.blob") + makeOld(server, expectOld, "stored/59/0c148428d5c35fab3ebad2f3365bb469ab9c531b60831f3e826c472027a0b9/3367.blob") + makeOld(server, expectOld, "stored/80/b749c27b2fef7255e7e7b3c2029b03b31299c75ff1f1c72732081c70a713a3/7488.blob") + makeOld(server, expectOld, "stored/dc/89f15de821ad1df3e78f8ef455e653a2d1862f2eb3f5ee78aa4ca68eb6fb35/781.blob") + + // utility mapping to be able to find absolute paths more easily + absPaths := map[string]string{} + for absPath := range expectOld { + absPaths[path.Base(absPath)] = absPath + } + + // No symlinks created yet, so this should report all the files in oldFiles. + oldFiles := copymap(expectOld) + err := server.gcFilterLinkedFiles(server.config.CheckoutPath, oldFiles, log.With().Str("package", "shaman/test").Logger(), nil) + assert.Nil(t, err) + assert.EqualValues(t, expectOld, oldFiles) + + // Create some symlinks + checkoutInfo, err := server.checkoutMan.PrepareCheckout("checkoutID") + assert.Nil(t, err) + err = server.checkoutMan.SymlinkToCheckout(absPaths["3367.blob"], server.config.CheckoutPath, + path.Join(checkoutInfo.RelativePath, "use-of-3367.blob")) + assert.Nil(t, err) + err = server.checkoutMan.SymlinkToCheckout(absPaths["781.blob"], extraCheckoutDir, + path.Join(checkoutInfo.RelativePath, "use-of-781.blob")) + assert.Nil(t, err) + + // There should only be two old file reported now. + expectRemovable := mtimeMap{ + absPaths["6001.blob"]: expectOld[absPaths["6001.blob"]], + absPaths["7488.blob"]: expectOld[absPaths["7488.blob"]], + } + oldFiles = copymap(expectOld) + stats := GCStats{} + err = server.gcFilterLinkedFiles(server.config.CheckoutPath, oldFiles, log.With().Str("package", "shaman/test").Logger(), &stats) + assert.Equal(t, 1, stats.numSymlinksChecked) // 1 is in checkoutPath, the other in extraCheckoutDir + assert.Nil(t, err) + assert.Equal(t, len(expectRemovable)+1, len(oldFiles)) // one file is linked from the extra checkout dir + err = server.gcFilterLinkedFiles(extraCheckoutDir, oldFiles, log.With().Str("package", "shaman/test").Logger(), &stats) + assert.Equal(t, 2, stats.numSymlinksChecked) // 1 is in checkoutPath, the other in extraCheckoutDir + assert.Nil(t, err) + assert.EqualValues(t, expectRemovable, oldFiles) + + // Touching a file before requesting deletion should not delete it. + now := time.Now() + err = os.Chtimes(absPaths["6001.blob"], now, now) + assert.Nil(t, err) + + // Running the garbage collector should only remove that one unused and untouched file. + assert.FileExists(t, absPaths["6001.blob"], "file should exist before GC") + assert.FileExists(t, absPaths["7488.blob"], "file should exist before GC") + server.gcDeleteOldFiles(true, oldFiles, log.With().Str("package", "shaman/test").Logger()) + assert.FileExists(t, absPaths["6001.blob"], "file should exist after dry-run GC") + assert.FileExists(t, absPaths["7488.blob"], "file should exist after dry-run GC") + + server.gcDeleteOldFiles(false, oldFiles, log.With().Str("package", "shaman/test").Logger()) + + assert.FileExists(t, absPaths["3367.blob"], "file should exist after GC") + assert.FileExists(t, absPaths["6001.blob"], "file should exist after GC") + assert.FileExists(t, absPaths["781.blob"], "file should exist after GC") + _, err = os.Stat(absPaths["7488.blob"]) + assert.True(t, os.IsNotExist(err), "file %s should NOT exist after GC", absPaths["7488.blob"]) +} + +// Test of the high-level GCStorage() function. +func TestGarbageCollect(t *testing.T) { + server, cleanup := createTestShaman() + defer cleanup() + + extraCheckoutDir := path.Join(server.config.TestTempDir, "extra-checkout") + server.config.GarbageCollect.ExtraCheckoutDirs = []string{extraCheckoutDir} + + filestore.LinkTestFileStore(server.config.FileStorePath) + + // Make some files old. + expectOld := mtimeMap{} + makeOld(server, expectOld, "stored/30/928ffced04c7008f3324fded86d133effea50828f5ad896196f2a2e190ac7e/6001.blob") + makeOld(server, expectOld, "stored/59/0c148428d5c35fab3ebad2f3365bb469ab9c531b60831f3e826c472027a0b9/3367.blob") + makeOld(server, expectOld, "stored/80/b749c27b2fef7255e7e7b3c2029b03b31299c75ff1f1c72732081c70a713a3/7488.blob") + makeOld(server, expectOld, "stored/dc/89f15de821ad1df3e78f8ef455e653a2d1862f2eb3f5ee78aa4ca68eb6fb35/781.blob") + + // utility mapping to be able to find absolute paths more easily + absPaths := map[string]string{} + for absPath := range expectOld { + absPaths[path.Base(absPath)] = absPath + } + + // Create some symlinks + checkoutInfo, err := server.checkoutMan.PrepareCheckout("checkoutID") + assert.Nil(t, err) + err = server.checkoutMan.SymlinkToCheckout(absPaths["3367.blob"], server.config.CheckoutPath, + path.Join(checkoutInfo.RelativePath, "use-of-3367.blob")) + assert.Nil(t, err) + err = server.checkoutMan.SymlinkToCheckout(absPaths["781.blob"], extraCheckoutDir, + path.Join(checkoutInfo.RelativePath, "use-of-781.blob")) + assert.Nil(t, err) + + // Running the garbage collector should only remove those two unused files. + assert.FileExists(t, absPaths["6001.blob"], "file should exist before GC") + assert.FileExists(t, absPaths["7488.blob"], "file should exist before GC") + server.GCStorage(true) + assert.FileExists(t, absPaths["6001.blob"], "file should exist after dry-run GC") + assert.FileExists(t, absPaths["7488.blob"], "file should exist after dry-run GC") + server.GCStorage(false) + _, err = os.Stat(absPaths["6001.blob"]) + assert.True(t, os.IsNotExist(err), "file %s should NOT exist after GC", absPaths["6001.blob"]) + _, err = os.Stat(absPaths["7488.blob"]) + assert.True(t, os.IsNotExist(err), "file %s should NOT exist after GC", absPaths["7488.blob"]) + + // Used files should still exist. + assert.FileExists(t, absPaths["781.blob"]) + assert.FileExists(t, absPaths["3367.blob"]) +} diff --git a/pkg/shaman/config/config.go b/pkg/shaman/config/config.go new file mode 100644 index 00000000..c8787669 --- /dev/null +++ b/pkg/shaman/config/config.go @@ -0,0 +1,55 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package config + +import ( + "time" +) + +// Config contains all the Shaman configuration +type Config struct { + // Used only for unit tests, so that they know where the temporary + // directory created for this test is located. + TestTempDir string `yaml:"-"` + + Enabled bool `yaml:"enabled"` + + FileStorePath string `yaml:"fileStorePath"` + CheckoutPath string `yaml:"checkoutPath"` + + GarbageCollect GarbageCollect `yaml:"garbageCollect"` +} + +// GarbageCollect contains the config options for the GC. +type GarbageCollect struct { + // How frequently garbage collection is performed on the file store: + Period time.Duration `yaml:"period"` + // How old files must be before they are GC'd: + MaxAge time.Duration `yaml:"maxAge"` + // Paths to check for symlinks before GC'ing files. + ExtraCheckoutDirs []string `yaml:"extraCheckoutPaths"` + + // Used by the -gc CLI arg to silently disable the garbage collector + // while we're performing a manual sweep. + SilentlyDisable bool `yaml:"-"` +} diff --git a/pkg/shaman/config/testing.go b/pkg/shaman/config/testing.go new file mode 100644 index 00000000..7b362090 --- /dev/null +++ b/pkg/shaman/config/testing.go @@ -0,0 +1,58 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package config + +import ( + "io/ioutil" + "os" + "path" + "time" +) + +// CreateTestConfig creates a configuration + cleanup function. +func CreateTestConfig() (conf Config, cleanup func()) { + tempDir, err := ioutil.TempDir("", "shaman-test-") + if err != nil { + panic(err) + } + + conf = Config{ + TestTempDir: tempDir, + Enabled: true, + FileStorePath: path.Join(tempDir, "file-store"), + CheckoutPath: path.Join(tempDir, "checkout"), + + GarbageCollect: GarbageCollect{ + Period: 8 * time.Hour, + MaxAge: 31 * 24 * time.Hour, + ExtraCheckoutDirs: []string{}, + }, + } + + cleanup = func() { + if err := os.RemoveAll(tempDir); err != nil { + panic(err) + } + } + return +} diff --git a/pkg/shaman/fileserver/checkfile.go b/pkg/shaman/fileserver/checkfile.go new file mode 100644 index 00000000..9baf13d4 --- /dev/null +++ b/pkg/shaman/fileserver/checkfile.go @@ -0,0 +1,46 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package fileserver + +import ( + "context" + "net/http" + + "git.blender.org/flamenco/pkg/shaman/filestore" +) + +var responseForStatus = map[filestore.FileStatus]int{ + filestore.StatusUploading: 420, // Enhance Your Calm + filestore.StatusStored: http.StatusOK, + filestore.StatusDoesNotExist: http.StatusNotFound, +} + +func (fs *FileServer) checkFile(ctx context.Context, w http.ResponseWriter, checksum string, filesize int64) { + _, status := fs.fileStore.ResolveFile(checksum, filesize, filestore.ResolveEverything) + code, ok := responseForStatus[status] + if !ok { + packageLogger.WithField("fileStoreStatus", status).Error("no HTTP status code implemented") + code = http.StatusInternalServerError + } + w.WriteHeader(code) +} diff --git a/pkg/shaman/fileserver/fileserver.go b/pkg/shaman/fileserver/fileserver.go new file mode 100644 index 00000000..ad2d9887 --- /dev/null +++ b/pkg/shaman/fileserver/fileserver.go @@ -0,0 +1,73 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package fileserver + +import ( + "context" + "sync" + + "git.blender.org/flamenco/pkg/shaman/filestore" +) + +type receiverChannel chan struct{} + +// FileServer deals with receiving and serving of uploaded files. +type FileServer struct { + fileStore filestore.Storage + + receiverMutex sync.Mutex + receiverChannels map[string]receiverChannel + + ctx context.Context + ctxCancel context.CancelFunc + wg sync.WaitGroup +} + +// New creates a new File Server and starts a monitoring goroutine. +func New(fileStore filestore.Storage) *FileServer { + ctx, ctxCancel := context.WithCancel(context.Background()) + + fs := &FileServer{ + fileStore, + sync.Mutex{}, + map[string]receiverChannel{}, + ctx, + ctxCancel, + sync.WaitGroup{}, + } + + return fs +} + +// Go starts goroutines for background operations. +// After Go() has been called, use Close() to stop those goroutines. +func (fs *FileServer) Go() { + fs.wg.Add(1) + go fs.receiveListenerPeriodicCheck() +} + +// Close stops any goroutines started by this server, and waits for them to close. +func (fs *FileServer) Close() { + fs.ctxCancel() + fs.wg.Wait() +} diff --git a/pkg/shaman/fileserver/logging.go b/pkg/shaman/fileserver/logging.go new file mode 100644 index 00000000..ed34ba93 --- /dev/null +++ b/pkg/shaman/fileserver/logging.go @@ -0,0 +1,29 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package fileserver + +import ( + "github.com/sirupsen/logrus" +) + +var packageLogger = logrus.WithField("package", "shaman/receiver") diff --git a/pkg/shaman/fileserver/receivefile.go b/pkg/shaman/fileserver/receivefile.go new file mode 100644 index 00000000..15b77053 --- /dev/null +++ b/pkg/shaman/fileserver/receivefile.go @@ -0,0 +1,176 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package fileserver + +import ( + "context" + "fmt" + "io" + "net/http" + + "git.blender.org/flamenco/pkg/shaman/filestore" + "git.blender.org/flamenco/pkg/shaman/hasher" + "git.blender.org/flamenco/pkg/shaman/httpserver" + "git.blender.org/flamenco/pkg/shaman/jwtauth" + "github.com/sirupsen/logrus" +) + +// receiveFile streams a file from a HTTP request to disk. +func (fs *FileServer) receiveFile(ctx context.Context, w http.ResponseWriter, r *http.Request, checksum string, filesize int64) { + logger := packageLogger.WithFields(jwtauth.RequestLogFields(r)) + + bodyReader, err := httpserver.DecompressedReader(r) + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + defer bodyReader.Close() + + originalFilename := r.Header.Get("X-Shaman-Original-Filename") + if originalFilename == "" { + originalFilename = "-not specified-" + } + logger = logger.WithField("originalFilename", originalFilename) + + localPath, status := fs.fileStore.ResolveFile(checksum, filesize, filestore.ResolveEverything) + logger = logger.WithField("path", localPath) + if status == filestore.StatusStored { + logger.Info("uploaded file already exists") + w.Header().Set("Location", r.RequestURI) + http.Error(w, "File already stored", http.StatusAlreadyReported) + return + } + + if status == filestore.StatusUploading && r.Header.Get("X-Shaman-Can-Defer-Upload") == "true" { + logger.Info("someone is uploading this file and client can defer") + http.Error(w, "File being uploaded, please defer", http.StatusAlreadyReported) + return + } + logger.Info("receiving file") + + streamTo, err := fs.fileStore.OpenForUpload(checksum, filesize) + if err != nil { + logger.WithError(err).Error("unable to open file for writing uploaded data") + http.Error(w, "Unable to open file", http.StatusInternalServerError) + return + } + + // clean up temporary file if it still exists at function exit. + defer func() { + streamTo.Close() + fs.fileStore.RemoveUploadedFile(streamTo.Name()) + }() + + // Abort this uploadwhen the file has been finished by someone else. + uploadDone := make(chan struct{}) + uploadAlreadyCompleted := false + defer close(uploadDone) + receiverChannel := fs.receiveListenerFor(checksum, filesize) + go func() { + select { + case <-receiverChannel: + case <-uploadDone: + close(receiverChannel) + return + } + + logger := logger.WithField("path", localPath) + logger.Info("file was completed during someone else's upload") + + uploadAlreadyCompleted = true + err := r.Body.Close() + if err != nil { + logger.WithError(err).Warning("error closing connection") + } + }() + + written, actualChecksum, err := hasher.Copy(streamTo, bodyReader) + if err != nil { + if closeErr := streamTo.Close(); closeErr != nil { + logger.WithFields(logrus.Fields{ + logrus.ErrorKey: err, + "closeError": closeErr, + }).Error("error closing local file after other I/O error occured") + } + + logger = logger.WithError(err) + if uploadAlreadyCompleted { + logger.Debug("aborted upload") + w.Header().Set("Location", r.RequestURI) + http.Error(w, "File already stored", http.StatusAlreadyReported) + } else if err == io.ErrUnexpectedEOF { + logger.Info("unexpected EOF, client probably just disconnected") + } else { + logger.Warning("unable to copy request body to file") + http.Error(w, "I/O error", http.StatusInternalServerError) + } + return + } + + if err := streamTo.Close(); err != nil { + logger.WithError(err).Warning("error closing local file") + http.Error(w, "I/O error", http.StatusInternalServerError) + return + } + + if written != filesize { + logger.WithFields(logrus.Fields{ + "declaredSize": filesize, + "actualSize": written, + }).Warning("mismatch between expected and actual size") + http.Error(w, + fmt.Sprintf("Received %d bytes but you promised %d", written, filesize), + http.StatusExpectationFailed) + return + } + + if actualChecksum != checksum { + logger.WithFields(logrus.Fields{ + "declaredChecksum": checksum, + "actualChecksum": actualChecksum, + }).Warning("mismatch between expected and actual checksum") + http.Error(w, + "Declared and actual checksums differ", + http.StatusExpectationFailed) + return + } + + logger.WithFields(logrus.Fields{ + "receivedBytes": written, + "checksum": actualChecksum, + "tempFile": streamTo.Name(), + }).Debug("File received") + + if err := fs.fileStore.MoveToStored(checksum, filesize, streamTo.Name()); err != nil { + logger.WithFields(logrus.Fields{ + "tempFile": streamTo.Name(), + logrus.ErrorKey: err, + }).Error("unable to move file from 'upload' to 'stored' storage") + http.Error(w, + "unable to move file from 'upload' to 'stored' storage", + http.StatusInternalServerError) + return + } + + http.Error(w, "", http.StatusNoContent) +} diff --git a/pkg/shaman/fileserver/receivefile_test.go b/pkg/shaman/fileserver/receivefile_test.go new file mode 100644 index 00000000..e61a35fd --- /dev/null +++ b/pkg/shaman/fileserver/receivefile_test.go @@ -0,0 +1,87 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package fileserver + +import ( + "io/ioutil" + "net/http" + "net/http/httptest" + "strconv" + "testing" + + "git.blender.org/flamenco/pkg/shaman/hasher" + "git.blender.org/flamenco/pkg/shaman/httpserver" + + "git.blender.org/flamenco/pkg/shaman/filestore" + "github.com/gorilla/mux" + "github.com/stretchr/testify/assert" +) + +func TestStoreFile(t *testing.T) { + server, cleanup := createTestServer() + defer cleanup() + + payload := []byte("hähähä") + // Just to double-check it's encoded as UTF-8: + assert.EqualValues(t, []byte("h\xc3\xa4h\xc3\xa4h\xc3\xa4"), payload) + + filesize := int64(len(payload)) + + testWithChecksum := func(checksum string) *httptest.ResponseRecorder { + compressedPayload := httpserver.CompressBuffer(payload) + respRec := httptest.NewRecorder() + req := httptest.NewRequest("POST", "/files/{checksum}/{filesize}", compressedPayload) + req = mux.SetURLVars(req, map[string]string{ + "checksum": checksum, + "filesize": strconv.FormatInt(filesize, 10), + }) + req.Header.Set("Content-Encoding", "gzip") + req.Header.Set("X-Shaman-Original-Filename", "in-memory-file.txt") + server.ServeHTTP(respRec, req) + return respRec + } + + var respRec *httptest.ResponseRecorder + var path string + var status filestore.FileStatus + + // A bad checksum should be rejected. + badChecksum := "da-checksum-is-long-enough-like-this" + respRec = testWithChecksum(badChecksum) + assert.Equal(t, http.StatusExpectationFailed, respRec.Code) + path, status = server.fileStore.ResolveFile(badChecksum, filesize, filestore.ResolveEverything) + assert.Equal(t, filestore.StatusDoesNotExist, status) + assert.Equal(t, "", path) + + // The correct checksum should be accepted. + correctChecksum := hasher.Checksum(payload) + respRec = testWithChecksum(correctChecksum) + assert.Equal(t, http.StatusNoContent, respRec.Code) + path, status = server.fileStore.ResolveFile(correctChecksum, filesize, filestore.ResolveEverything) + assert.Equal(t, filestore.StatusStored, status) + assert.FileExists(t, path) + + savedContent, err := ioutil.ReadFile(path) + assert.Nil(t, err) + assert.EqualValues(t, payload, savedContent, "The file should be saved uncompressed") +} diff --git a/pkg/shaman/fileserver/receivelistener.go b/pkg/shaman/fileserver/receivelistener.go new file mode 100644 index 00000000..cd553ab4 --- /dev/null +++ b/pkg/shaman/fileserver/receivelistener.go @@ -0,0 +1,88 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package fileserver + +import ( + "fmt" + "time" +) + +// Returns a channel that is open while the given file is being received. +// The first to fully receive the file should close the channel, indicating to others +// that their upload can be aborted. +func (fs *FileServer) receiveListenerFor(checksum string, filesize int64) chan struct{} { + fs.receiverMutex.Lock() + defer fs.receiverMutex.Unlock() + + key := fmt.Sprintf("%s/%d", checksum, filesize) + channel := fs.receiverChannels[key] + if channel != nil { + return channel + } + + channel = make(receiverChannel) + fs.receiverChannels[key] = channel + + go func() { + // Wait until the channel closes. + select { + case <-channel: + } + + fs.receiverMutex.Lock() + defer fs.receiverMutex.Unlock() + delete(fs.receiverChannels, key) + }() + + return channel +} + +func (fs *FileServer) receiveListenerPeriodicCheck() { + defer fs.wg.Done() + lastReportedChans := -1 + + doCheck := func() { + fs.receiverMutex.Lock() + defer fs.receiverMutex.Unlock() + + numChans := len(fs.receiverChannels) + if numChans == 0 { + if lastReportedChans != 0 { + packageLogger.Debug("no receive listener channels") + } + } else { + packageLogger.WithField("num_receiver_channels", numChans).Debug("receiving files") + } + lastReportedChans = numChans + } + + for { + select { + case <-fs.ctx.Done(): + packageLogger.Debug("stopping receive listener periodic check") + return + case <-time.After(1 * time.Minute): + doCheck() + } + } +} diff --git a/pkg/shaman/fileserver/routes.go b/pkg/shaman/fileserver/routes.go new file mode 100644 index 00000000..3b5c0a16 --- /dev/null +++ b/pkg/shaman/fileserver/routes.go @@ -0,0 +1,97 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package fileserver + +import ( + "errors" + "fmt" + "net/http" + "strconv" + + "git.blender.org/flamenco/pkg/shaman/jwtauth" + "github.com/gorilla/mux" + "github.com/sirupsen/logrus" +) + +// AddRoutes adds this package's routes to the Router. +func (fs *FileServer) AddRoutes(router *mux.Router, auther jwtauth.Authenticator) { + router.Handle("/files/{checksum}/{filesize}", auther.Wrap(fs)).Methods("GET", "POST", "OPTIONS") +} + +func (fs *FileServer) ServeHTTP(w http.ResponseWriter, r *http.Request) { + logger := packageLogger.WithFields(jwtauth.RequestLogFields(r)) + + checksum, filesize, err := parseRequestVars(w, r) + if err != nil { + logger.WithError(err).Warning("invalid request") + return + } + + logger = logger.WithFields(logrus.Fields{ + "checksum": checksum, + "filesize": filesize, + }) + + switch r.Method { + case http.MethodOptions: + logger.Info("checking file") + fs.checkFile(r.Context(), w, checksum, filesize) + case http.MethodGet: + // TODO: make optional or just delete: + logger.Info("serving file") + fs.serveFile(r.Context(), w, checksum, filesize) + case http.MethodPost: + fs.receiveFile(r.Context(), w, r, checksum, filesize) + default: + // This should never be reached due to the router options, but just in case. + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + } +} + +func parseRequestVars(w http.ResponseWriter, r *http.Request) (string, int64, error) { + vars := mux.Vars(r) + checksum, ok := vars["checksum"] + if !ok { + http.Error(w, "missing checksum", http.StatusBadRequest) + return "", 0, errors.New("missing checksum") + } + // Arbitrary minimum length, but we can fairly safely assume that all + // hashing methods used produce a hash of at least 32 characters. + if len(checksum) < 32 { + http.Error(w, "checksum suspiciously short", http.StatusBadRequest) + return "", 0, errors.New("checksum suspiciously short") + } + + filesizeStr, ok := vars["filesize"] + if !ok { + http.Error(w, "missing filesize", http.StatusBadRequest) + return "", 0, errors.New("missing filesize") + } + filesize, err := strconv.ParseInt(filesizeStr, 10, 64) + if err != nil { + http.Error(w, "invalid filesize", http.StatusBadRequest) + return "", 0, fmt.Errorf("invalid filesize: %v", err) + } + + return checksum, filesize, nil +} diff --git a/pkg/shaman/fileserver/servefile.go b/pkg/shaman/fileserver/servefile.go new file mode 100644 index 00000000..a2bdaa13 --- /dev/null +++ b/pkg/shaman/fileserver/servefile.go @@ -0,0 +1,83 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package fileserver + +import ( + "context" + "fmt" + "io" + "net/http" + "os" + "strconv" + + "github.com/sirupsen/logrus" + + "git.blender.org/flamenco/pkg/shaman/filestore" +) + +// serveFile only serves stored files (not 'uploading' or 'checking') +func (fs *FileServer) serveFile(ctx context.Context, w http.ResponseWriter, checksum string, filesize int64) { + path, status := fs.fileStore.ResolveFile(checksum, filesize, filestore.ResolveStoredOnly) + if status != filestore.StatusStored { + http.Error(w, "File Not Found", http.StatusNotFound) + return + } + + logger := packageLogger.WithField("path", path) + + stat, err := os.Stat(path) + if err != nil { + logger.WithError(err).Error("unable to stat file") + http.Error(w, "File Not Found", http.StatusNotFound) + return + } + if stat.Size() != filesize { + logger.WithFields(logrus.Fields{ + "realSize": stat.Size(), + "expectedSize": filesize, + }).Error("file size in storage is corrupt") + http.Error(w, "File Size Incorrect", http.StatusInternalServerError) + return + } + + infile, err := os.Open(path) + if err != nil { + logger.WithError(err).Error("unable to read file") + http.Error(w, "File Not Found", http.StatusNotFound) + return + } + + filesizeStr := strconv.FormatInt(filesize, 10) + w.Header().Set("Content-Type", "application/binary") + w.Header().Set("Content-Length", filesizeStr) + w.Header().Set("ETag", fmt.Sprintf("'%s-%s'", checksum, filesizeStr)) + w.Header().Set("X-Shaman-Checksum", checksum) + + written, err := io.Copy(w, infile) + if err != nil { + logger.WithError(err).Error("unable to copy file to writer") + // Anything could have been sent by now, so just close the connection. + return + } + logger.WithField("written", written).Debug("file send to writer") +} diff --git a/pkg/shaman/fileserver/servefile_test.go b/pkg/shaman/fileserver/servefile_test.go new file mode 100644 index 00000000..9a815cca --- /dev/null +++ b/pkg/shaman/fileserver/servefile_test.go @@ -0,0 +1,71 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package fileserver + +import ( + "net/http" + "net/http/httptest" + "strconv" + "testing" + + "git.blender.org/flamenco/pkg/shaman/config" + "git.blender.org/flamenco/pkg/shaman/filestore" + "github.com/gorilla/mux" + "github.com/stretchr/testify/assert" +) + +func createTestServer() (server *FileServer, cleanup func()) { + config, configCleanup := config.CreateTestConfig() + + store := filestore.New(config) + server = New(store) + server.Go() + + cleanup = func() { + server.Close() + configCleanup() + } + return +} + +func TestServeFile(t *testing.T) { + server, cleanup := createTestServer() + defer cleanup() + + payload := []byte("hähähä") + checksum := "da-checksum-is-long-enough-like-this" + filesize := int64(len(payload)) + + server.fileStore.(*filestore.Store).MustStoreFileForTest(checksum, filesize, payload) + + respRec := httptest.NewRecorder() + req := httptest.NewRequest("GET", "/files/{checksum}/{filesize}", nil) + req = mux.SetURLVars(req, map[string]string{ + "checksum": checksum, + "filesize": strconv.FormatInt(filesize, 10), + }) + server.ServeHTTP(respRec, req) + + assert.Equal(t, http.StatusOK, respRec.Code) + assert.EqualValues(t, payload, respRec.Body.Bytes()) +} diff --git a/pkg/shaman/filestore/filestore.go b/pkg/shaman/filestore/filestore.go new file mode 100644 index 00000000..86745b5d --- /dev/null +++ b/pkg/shaman/filestore/filestore.go @@ -0,0 +1,196 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package filestore + +import ( + "os" + "path" + "strconv" + + "git.blender.org/flamenco/pkg/shaman/config" + "github.com/sirupsen/logrus" +) + +// Store represents the default Shaman file store. +type Store struct { + baseDir string + + uploading storageBin + stored storageBin +} + +// New returns a new file store. +func New(conf config.Config) Storage { + packageLogger.WithField("storageDir", conf.FileStorePath).Info("opening file store") + store := &Store{ + conf.FileStorePath, + storageBin{conf.FileStorePath, "uploading", true, ".tmp"}, + storageBin{conf.FileStorePath, "stored", false, ".blob"}, + } + store.createDirectoryStructure() + return store +} + +// Create the base directory structure for this store. +func (s *Store) createDirectoryStructure() { + mkdir := func(subdir string) { + path := path.Join(s.baseDir, subdir) + logger := packageLogger.WithField("path", path) + stat, err := os.Stat(path) + + if err == nil { + if stat.IsDir() { + // Exists and is a directory; nothing to do. + return + } + logger.Fatal("path exists but is not a directory") + } + + if !os.IsNotExist(err) { + logger.WithError(err).Fatal("unable to stat directory") + } + + logger.Debug("creating directory") + if err := os.MkdirAll(path, 0777); err != nil { + logger.WithError(err).Fatal("unable to create directory") + } + } + + mkdir(s.uploading.dirName) + mkdir(s.stored.dirName) +} + +// StoragePath returns the directory path of the 'stored' storage bin. +func (s *Store) StoragePath() string { + return path.Join(s.stored.basePath, s.stored.dirName) +} + +// BasePath returns the directory path of the storage. +func (s *Store) BasePath() string { + return s.baseDir +} + +// Returns the checksum/filesize dependent parts of the file's path. +// To be combined with a base directory, status directory, and status-dependent suffix. +func (s *Store) partialFilePath(checksum string, filesize int64) string { + return path.Join(checksum[0:2], checksum[2:], strconv.FormatInt(filesize, 10)) +} + +// ResolveFile checks the status of the file in the store. +func (s *Store) ResolveFile(checksum string, filesize int64, storedOnly StoredOnly) (path string, status FileStatus) { + partial := s.partialFilePath(checksum, filesize) + + logger := packageLogger.WithFields(logrus.Fields{ + "checksum": checksum, + "filesize": filesize, + "partialPath": partial, + "storagePath": s.baseDir, + }) + + if path = s.stored.resolve(partial); path != "" { + // logger.WithField("path", path).Debug("found stored file") + return path, StatusStored + } + if storedOnly != ResolveEverything { + // logger.Debug("file does not exist in 'stored' state") + return "", StatusDoesNotExist + } + + if path = s.uploading.resolve(partial); path != "" { + logger.WithField("path", path).Debug("found currently uploading file") + return path, StatusUploading + } + // logger.Debug("file does not exist") + return "", StatusDoesNotExist +} + +// OpenForUpload returns a file pointer suitable to stream an uploaded file to. +func (s *Store) OpenForUpload(checksum string, filesize int64) (*os.File, error) { + partial := s.partialFilePath(checksum, filesize) + return s.uploading.openForWriting(partial) +} + +// MoveToStored moves a file from 'uploading' to 'stored' storage. +// It is assumed that the checksum and filesize have been verified. +func (s *Store) MoveToStored(checksum string, filesize int64, uploadedFilePath string) error { + // Check that the uploaded file path is actually in the 'uploading' storage. + partial := s.partialFilePath(checksum, filesize) + if !s.uploading.contains(partial, uploadedFilePath) { + return ErrNotInUploading + } + + // Move to the other storage bin. + targetPath := s.stored.pathFor(partial) + targetDir, _ := path.Split(targetPath) + if err := os.MkdirAll(targetDir, 0777); err != nil { + return err + } + logger := packageLogger.WithFields(logrus.Fields{ + "uploadedPath": uploadedFilePath, + "storagePath": targetPath, + }) + logger.Debug("moving uploaded file to storage") + if err := os.Rename(uploadedFilePath, targetPath); err != nil { + return err + } + + s.RemoveUploadedFile(uploadedFilePath) + return nil +} + +func (s *Store) removeFile(filePath string) error { + err := os.Remove(filePath) + if err != nil { + packageLogger.WithError(err).Debug("unable to delete file; ignoring") + } + + // Clean up directory structure, but ignore any errors (dirs may not be empty) + directory := path.Dir(filePath) + os.Remove(directory) + os.Remove(path.Dir(directory)) + + return err +} + +// RemoveUploadedFile removes a file from the 'uploading' storage bin. +// Errors are ignored. +func (s *Store) RemoveUploadedFile(filePath string) { + // Check that the file path is actually in the 'uploading' storage. + if !s.uploading.contains("", filePath) { + packageLogger.WithField("file", filePath).Error( + "filestore.Store.RemoveUploadedFile called with file not in 'uploading' storage bin") + return + } + s.removeFile(filePath) +} + +// RemoveStoredFile removes a file from the 'stored' storage bin. +func (s *Store) RemoveStoredFile(filePath string) error { + // Check that the file path is actually in the 'stored' storage. + if !s.stored.contains("", filePath) { + packageLogger.WithField("file", filePath).Error( + "filestore.Store.RemoveStoredFile called with file not in 'stored' storage bin") + return os.ErrNotExist + } + return s.removeFile(filePath) +} diff --git a/pkg/shaman/filestore/filestore_test.go b/pkg/shaman/filestore/filestore_test.go new file mode 100644 index 00000000..41243170 --- /dev/null +++ b/pkg/shaman/filestore/filestore_test.go @@ -0,0 +1,155 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package filestore + +import ( + "io/ioutil" + "os" + "path" + "testing" + + "github.com/stretchr/testify/assert" +) + +// mustCreateFile creates an empty file. +// The containing directory structure is created as well, if necessary. +func mustCreateFile(filepath string) { + err := os.MkdirAll(path.Dir(filepath), 0777) + if err != nil { + panic(err) + } + + file, err := os.Create(filepath) + if err != nil { + panic(err) + } + file.Close() +} + +func TestCreateDirectories(t *testing.T) { + store := CreateTestStore() + defer CleanupTestStore(store) + + assert.Equal(t, path.Join(store.baseDir, "uploading", "x"), store.uploading.storagePrefix("x")) + assert.Equal(t, path.Join(store.baseDir, "stored", "x"), store.stored.storagePrefix("x")) + + assert.DirExists(t, path.Join(store.baseDir, "uploading")) + assert.DirExists(t, path.Join(store.baseDir, "stored")) +} + +func TestResolveStoredFile(t *testing.T) { + store := CreateTestStore() + defer CleanupTestStore(store) + + foundPath, status := store.ResolveFile("abcdefxxx", 123, ResolveStoredOnly) + assert.Equal(t, "", foundPath) + assert.Equal(t, StatusDoesNotExist, status) + + fname := path.Join(store.baseDir, "stored", "ab", "cdefxxx", "123.blob") + mustCreateFile(fname) + + foundPath, status = store.ResolveFile("abcdefxxx", 123, ResolveStoredOnly) + assert.Equal(t, fname, foundPath) + assert.Equal(t, StatusStored, status) + + foundPath, status = store.ResolveFile("abcdefxxx", 123, ResolveEverything) + assert.Equal(t, fname, foundPath) + assert.Equal(t, StatusStored, status) +} + +func TestResolveUploadingFile(t *testing.T) { + store := CreateTestStore() + defer CleanupTestStore(store) + + foundPath, status := store.ResolveFile("abcdefxxx", 123, ResolveEverything) + assert.Equal(t, "", foundPath) + assert.Equal(t, StatusDoesNotExist, status) + + fname := path.Join(store.baseDir, "uploading", "ab", "cdefxxx", "123-unique-code.tmp") + mustCreateFile(fname) + + foundPath, status = store.ResolveFile("abcdefxxx", 123, ResolveStoredOnly) + assert.Equal(t, "", foundPath) + assert.Equal(t, StatusDoesNotExist, status) + + foundPath, status = store.ResolveFile("abcdefxxx", 123, ResolveEverything) + assert.Equal(t, fname, foundPath) + assert.Equal(t, StatusUploading, status) +} + +func TestOpenForUpload(t *testing.T) { + store := CreateTestStore() + defer CleanupTestStore(store) + + contents := []byte("je moešje") + fileSize := int64(len(contents)) + + file, err := store.OpenForUpload("abcdefxxx", fileSize) + assert.Nil(t, err) + file.Write(contents) + file.Close() + + foundPath, status := store.ResolveFile("abcdefxxx", fileSize, ResolveEverything) + assert.Equal(t, file.Name(), foundPath) + assert.Equal(t, StatusUploading, status) + + readContents, err := ioutil.ReadFile(foundPath) + assert.Nil(t, err) + assert.EqualValues(t, contents, readContents) +} + +func TestMoveToStored(t *testing.T) { + store := CreateTestStore() + defer CleanupTestStore(store) + + contents := []byte("je moešje") + fileSize := int64(len(contents)) + + err := store.MoveToStored("abcdefxxx", fileSize, "/just/some/path") + assert.NotNil(t, err) + + file, err := store.OpenForUpload("abcdefxxx", fileSize) + assert.Nil(t, err) + file.Write(contents) + file.Close() + tempLocation := file.Name() + + err = store.MoveToStored("abcdefxxx", fileSize, file.Name()) + assert.Nil(t, err) + + foundPath, status := store.ResolveFile("abcdefxxx", fileSize, ResolveEverything) + assert.NotEqual(t, file.Name(), foundPath) + assert.Equal(t, StatusStored, status) + + assert.FileExists(t, foundPath) + + // The entire directory structure should be kept clean. + assertDoesNotExist(t, tempLocation) + assertDoesNotExist(t, path.Dir(tempLocation)) + assertDoesNotExist(t, path.Dir(path.Dir(tempLocation))) +} + +func assertDoesNotExist(t *testing.T, path string) { + _, err := os.Stat(path) + assert.True(t, os.IsNotExist(err), "%s should not exist, err=%v", path, err) +} diff --git a/pkg/shaman/filestore/interface.go b/pkg/shaman/filestore/interface.go new file mode 100644 index 00000000..9643c5d7 --- /dev/null +++ b/pkg/shaman/filestore/interface.go @@ -0,0 +1,81 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package filestore + +import ( + "errors" + "os" +) + +// Storage is the interface for Shaman file stores. +type Storage interface { + // ResolveFile checks the status of the file in the store and returns the actual path. + ResolveFile(checksum string, filesize int64, storedOnly StoredOnly) (string, FileStatus) + + // OpenForUpload returns a file pointer suitable to stream an uploaded file to. + OpenForUpload(checksum string, filesize int64) (*os.File, error) + + // BasePath returns the directory path of the storage. + // This is the directory containing the 'stored' and 'uploading' directories. + BasePath() string + + // StoragePath returns the directory path of the 'stored' storage bin. + StoragePath() string + + // MoveToStored moves a file from 'uploading' storage to the actual 'stored' storage. + MoveToStored(checksum string, filesize int64, uploadedFilePath string) error + + // RemoveUploadedFile removes a file from the 'uploading' storage. + // This is intended to clean up files for which upload was aborted for some reason. + RemoveUploadedFile(filePath string) + + // RemoveStoredFile removes a file from the 'stored' storage bin. + // This is intended to garbage collect old, unused files. + RemoveStoredFile(filePath string) error +} + +// FileStatus represents the status of a file in the store. +type FileStatus int + +// Valid statuses for files in the store. +const ( + StatusNotSet FileStatus = iota + StatusDoesNotExist + StatusUploading + StatusStored +) + +// StoredOnly indicates whether to resolve only 'stored' files or also 'uploading' or 'checking'. +type StoredOnly bool + +// For the ResolveFile() call. This is more explicit than just true/false values. +const ( + ResolveStoredOnly StoredOnly = true + ResolveEverything StoredOnly = false +) + +// Predefined errors +var ( + ErrFileDoesNotExist = errors.New("file does not exist") + ErrNotInUploading = errors.New("file not stored in 'uploading' storage") +) diff --git a/pkg/shaman/filestore/logging.go b/pkg/shaman/filestore/logging.go new file mode 100644 index 00000000..bbbd1b7e --- /dev/null +++ b/pkg/shaman/filestore/logging.go @@ -0,0 +1,29 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package filestore + +import ( + "github.com/sirupsen/logrus" +) + +var packageLogger = logrus.WithField("package", "shaman/filestore") diff --git a/pkg/shaman/filestore/substore.go b/pkg/shaman/filestore/substore.go new file mode 100644 index 00000000..f47bafb2 --- /dev/null +++ b/pkg/shaman/filestore/substore.go @@ -0,0 +1,107 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package filestore + +import ( + "errors" + "os" + "path" + "path/filepath" +) + +type storageBin struct { + basePath string + dirName string + hasTempSuffix bool + fileSuffix string +} + +var ( + errNoWriteAllowed = errors.New("writing is only allowed in storage bins with a temp suffix") +) + +func (s *storageBin) storagePrefix(partialPath string) string { + return path.Join(s.basePath, s.dirName, partialPath) +} + +// Returns whether 'someFullPath' is pointing to a path inside our storage for the given partial path. +// Only looks at the paths, does not perform any filesystem checks to see the file is actually there. +func (s *storageBin) contains(partialPath, someFullPath string) bool { + expectedPrefix := s.storagePrefix(partialPath) + return len(expectedPrefix) < len(someFullPath) && expectedPrefix == someFullPath[:len(expectedPrefix)] +} + +// pathOrGlob returns either a path, or a glob when hasTempSuffix=true. +func (s *storageBin) pathOrGlob(partialPath string) string { + pathOrGlob := s.storagePrefix(partialPath) + if s.hasTempSuffix { + pathOrGlob += "-*" + } + pathOrGlob += s.fileSuffix + return pathOrGlob +} + +// resolve finds a file '{basePath}/{dirName}/partialPath*{fileSuffix}' +// and returns its path. The * glob pattern is only used when +// hasTempSuffix is true. +func (s *storageBin) resolve(partialPath string) string { + pathOrGlob := s.pathOrGlob(partialPath) + + if !s.hasTempSuffix { + _, err := os.Stat(pathOrGlob) + if err != nil { + return "" + } + return pathOrGlob + } + + matches, _ := filepath.Glob(pathOrGlob) + if len(matches) == 0 { + return "" + } + return matches[0] +} + +// pathFor(somePath) returns that path inside the storage bin, including proper suffix. +// Note that this is only valid for bins without temp suffixes. +func (s *storageBin) pathFor(partialPath string) string { + return s.storagePrefix(partialPath) + s.fileSuffix +} + +// openForWriting makes sure there is a place to write to. +func (s *storageBin) openForWriting(partialPath string) (*os.File, error) { + if !s.hasTempSuffix { + return nil, errNoWriteAllowed + } + + pathOrGlob := s.pathOrGlob(partialPath) + dirname, filename := path.Split(pathOrGlob) + + if err := os.MkdirAll(dirname, 0777); err != nil { + return nil, err + } + + // This creates the file with 0666 permissions (before umask). + // Note that this is our own TempFile() and not ioutils.TempFile(). + return TempFile(dirname, filename) +} diff --git a/pkg/shaman/filestore/substore_test.go b/pkg/shaman/filestore/substore_test.go new file mode 100644 index 00000000..d79c883b --- /dev/null +++ b/pkg/shaman/filestore/substore_test.go @@ -0,0 +1,87 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package filestore + +import ( + "os" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestStoragePrefix(t *testing.T) { + bin := storageBin{ + basePath: "/base", + dirName: "testunit", + } + assert.Equal(t, "/base/testunit", bin.storagePrefix("")) + assert.Equal(t, "/base/testunit", bin.storagePrefix("/")) + assert.Equal(t, "/base/testunit/xxx", bin.storagePrefix("xxx")) + assert.Equal(t, "/base/testunit/xxx", bin.storagePrefix("/xxx")) +} + +func TestContains(t *testing.T) { + bin := storageBin{ + basePath: "/base", + dirName: "testunit", + } + assert.True(t, bin.contains("", "/base/testunit/jemoeder.txt")) + assert.True(t, bin.contains("jemoeder", "/base/testunit/jemoeder.txt")) + assert.False(t, bin.contains("jemoeder", "/base/testunit/opjehoofd/jemoeder.txt")) + assert.False(t, bin.contains("", "/etc/passwd")) + assert.False(t, bin.contains("/", "/etc/passwd")) + assert.False(t, bin.contains("/etc", "/etc/passwd")) +} + +func TestFilePermissions(t *testing.T) { + dirname, err := os.MkdirTemp("", "file-permission-test") + assert.Nil(t, err) + defer os.RemoveAll(dirname) + + bin := storageBin{ + basePath: dirname, + dirName: "testunit", + hasTempSuffix: true, + } + + file, err := bin.openForWriting("testfilename.blend") + assert.Nil(t, err) + defer file.Close() + + filestat, err := file.Stat() + assert.Nil(t, err) + + // The exact permissions depend on the current (unittest) process umask. This + // umask is not easy to get, which is why we have a copy of `tempfile.go` in + // the first place. The important part is that the permissions shouldn't be + // the default 0600 created by ioutil.TempFile() but something more permissive + // and dependent on the umask. + fileMode := uint32(filestat.Mode()) + assert.True(t, fileMode > 0600, + "Expecting more open permissions than 0o600, got %O", fileMode) + + groupWorldMode := fileMode & 0077 + assert.True(t, groupWorldMode < 0066, + "Expecting tighter group+world permissions than wide-open 0o66, got %O. "+ + "Note that this test expects a non-zero umask.", groupWorldMode) +} diff --git a/pkg/shaman/filestore/tempfile.go b/pkg/shaman/filestore/tempfile.go new file mode 100644 index 00000000..f805ea73 --- /dev/null +++ b/pkg/shaman/filestore/tempfile.go @@ -0,0 +1,89 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This is a copy of `tempfile.go` from the Go 1.14 standard library. +// It has been modified to make TempFile() respect the process' umask +// instead of creating files with 0600 permissions. This is used to +// ensure files uploaded to Shaman storage will be usable by other +// processes (like Flamenco Worker running as a different user). + +package filestore + +import ( + "os" + "path/filepath" + "strconv" + "strings" + "sync" + "time" +) + +// Random number state. +// We generate random temporary file names so that there's a good +// chance the file doesn't exist yet - keeps the number of tries in +// TempFile to a minimum. +var rand uint32 +var randmu sync.Mutex + +func reseed() uint32 { + return uint32(time.Now().UnixNano() + int64(os.Getpid())) +} + +func nextRandom() string { + randmu.Lock() + r := rand + if r == 0 { + r = reseed() + } + r = r*1664525 + 1013904223 // constants from Numerical Recipes + rand = r + randmu.Unlock() + return strconv.Itoa(int(1e9 + r%1e9))[1:] +} + +// TempFile creates a new temporary file in the directory dir, +// opens the file for reading and writing, and returns the resulting *os.File. +// The filename is generated by taking pattern and adding a random +// string to the end. If pattern includes a "*", the random string +// replaces the last "*". +// If dir is the empty string, TempFile uses the default directory +// for temporary files (see os.TempDir). +// Multiple programs calling TempFile simultaneously +// will not choose the same file. The caller can use f.Name() +// to find the pathname of the file. It is the caller's responsibility +// to remove the file when no longer needed. +func TempFile(dir, pattern string) (f *os.File, err error) { + if dir == "" { + dir = os.TempDir() + } + + prefix, suffix := prefixAndSuffix(pattern) + + nconflict := 0 + for i := 0; i < 10000; i++ { + name := filepath.Join(dir, prefix+nextRandom()+suffix) + f, err = os.OpenFile(name, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666) // Changed from 0600 in the standard Go code. + if os.IsExist(err) { + if nconflict++; nconflict > 10 { + randmu.Lock() + rand = reseed() + randmu.Unlock() + } + continue + } + break + } + return +} + +// prefixAndSuffix splits pattern by the last wildcard "*", if applicable, +// returning prefix as the part before "*" and suffix as the part after "*". +func prefixAndSuffix(pattern string) (prefix, suffix string) { + if pos := strings.LastIndex(pattern, "*"); pos != -1 { + prefix, suffix = pattern[:pos], pattern[pos+1:] + } else { + prefix = pattern + } + return +} diff --git a/pkg/shaman/filestore/testing.go b/pkg/shaman/filestore/testing.go new file mode 100644 index 00000000..f12b31d4 --- /dev/null +++ b/pkg/shaman/filestore/testing.go @@ -0,0 +1,116 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package filestore + +import ( + "io/ioutil" + "os" + "path" + "path/filepath" + "runtime" + "time" + + "git.blender.org/flamenco/pkg/shaman/config" +) + +// CreateTestStore returns a Store that can be used for unit testing. +func CreateTestStore() *Store { + tempDir, err := ioutil.TempDir("", "shaman-filestore-test-") + if err != nil { + panic(err) + } + + conf := config.Config{ + FileStorePath: tempDir, + } + storage := New(conf) + store, ok := storage.(*Store) + if !ok { + panic("storage should be *Store") + } + + return store +} + +// CleanupTestStore deletes a store returned by CreateTestStore() +func CleanupTestStore(store *Store) { + if err := os.RemoveAll(store.baseDir); err != nil { + panic(err) + } +} + +// MustStoreFileForTest allows a unit test to store some file in the 'stored' storage bin. +// Any error will cause a panic. +func (s *Store) MustStoreFileForTest(checksum string, filesize int64, contents []byte) { + file, err := s.OpenForUpload(checksum, filesize) + if err != nil { + panic(err) + } + defer file.Close() + + written, err := file.Write(contents) + if err != nil { + panic(err) + } + if written != len(contents) { + panic("short write") + } + + err = s.MoveToStored(checksum, filesize, file.Name()) + if err != nil { + panic(err) + } +} + +// LinkTestFileStore creates a copy of _test_file_store by hard-linking files into a temporary directory. +// Panics if there are any errors. +func LinkTestFileStore(cloneTo string) { + _, myFilename, _, _ := runtime.Caller(0) + fileStorePath := path.Join(path.Dir(path.Dir(myFilename)), "_test_file_store") + now := time.Now() + + visit := func(visitPath string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + relpath, err := filepath.Rel(fileStorePath, visitPath) + if err != nil { + return err + } + + targetPath := path.Join(cloneTo, relpath) + if info.IsDir() { + return os.MkdirAll(targetPath, 0755) + } + err = os.Link(visitPath, targetPath) + if err != nil { + return err + } + // Make sure we always test with fresh files by default. + return os.Chtimes(targetPath, now, now) + } + if err := filepath.Walk(fileStorePath, visit); err != nil { + panic(err) + } +} diff --git a/pkg/shaman/hasher/checksum.go b/pkg/shaman/hasher/checksum.go new file mode 100644 index 00000000..79e3c866 --- /dev/null +++ b/pkg/shaman/hasher/checksum.go @@ -0,0 +1,36 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package hasher + +import ( + "crypto/sha256" + "fmt" +) + +// Checksum computes the SHA256 sum of the data and returns it as hexadecimal string. +func Checksum(data []byte) string { + hasher := sha256.New() + hasher.Write(data) + hashsum := hasher.Sum(nil) + return fmt.Sprintf("%x", hashsum) +} diff --git a/pkg/shaman/hasher/checksum_test.go b/pkg/shaman/hasher/checksum_test.go new file mode 100644 index 00000000..80094797 --- /dev/null +++ b/pkg/shaman/hasher/checksum_test.go @@ -0,0 +1,44 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package hasher + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestChecksum(t *testing.T) { + assert.Equal(t, + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + Checksum([]byte{})) + assert.Equal(t, + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + Checksum(nil)) + assert.Equal(t, + "be178c0543eb17f5f3043021c9e5fcf30285e557a4fc309cce97ff9ca6182912", + Checksum([]byte("hahaha"))) + assert.Equal(t, + "05b373f2ab421a112c779258ea456c17160fcc1d0fe0bb8282de26122873f6e2", + Checksum([]byte("hähähä"))) +} diff --git a/pkg/shaman/hasher/copier.go b/pkg/shaman/hasher/copier.go new file mode 100644 index 00000000..231101b9 --- /dev/null +++ b/pkg/shaman/hasher/copier.go @@ -0,0 +1,81 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package hasher + +import ( + "crypto/sha256" + "fmt" + "io" +) + +// Copy copies from src to dst and computes a checksum on the copied bytes. +func Copy(dst io.Writer, src io.Reader) (written int64, checksum string, err error) { + hasher := sha256.New() + var buf []byte + + // copied from io.copyBuffer + if buf == nil { + size := 32 * 1024 + if l, ok := src.(*io.LimitedReader); ok && int64(size) > l.N { + if l.N < 1 { + size = 1 + } else { + size = int(l.N) + } + } + buf = make([]byte, size) + } + + // copied from io.copyBuffer + for { + nr, er := src.Read(buf) + if nr > 0 { + // Write to the hasher. I'm assuming this always works + // because there is no actual writing to anything. + hasher.Write(buf[0:nr]) + + // Write to the output writer + nw, ew := dst.Write(buf[0:nr]) + if nw > 0 { + written += int64(nw) + } + if ew != nil { + err = ew + break + } + if nr != nw { + err = io.ErrShortWrite + break + } + } + if er != nil { + if er != io.EOF { + err = er + } + break + } + } + + hashsum := hasher.Sum(nil) + return written, fmt.Sprintf("%x", hashsum), err +} diff --git a/pkg/shaman/httpserver/filefinder.go b/pkg/shaman/httpserver/filefinder.go new file mode 100644 index 00000000..680de25b --- /dev/null +++ b/pkg/shaman/httpserver/filefinder.go @@ -0,0 +1,68 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package httpserver + +import ( + "os" + "path/filepath" + + "github.com/kardianos/osext" + "github.com/sirupsen/logrus" +) + +// RootPath returns the filename prefix to find bundled files. +// Files are searched for relative to the current working directory as well as relative +// to the currently running executable. +func RootPath(fileToFind string) string { + logger := packageLogger.WithField("fileToFind", fileToFind) + + // Find as relative path, i.e. relative to CWD. + _, err := os.Stat(fileToFind) + if err == nil { + logger.Debug("found in current working directory") + return "" + } + + // Find relative to executable folder. + exedirname, err := osext.ExecutableFolder() + if err != nil { + logger.WithError(err).Error("unable to determine the executable's directory") + return "" + } + + if _, err := os.Stat(filepath.Join(exedirname, fileToFind)); os.IsNotExist(err) { + cwd, err := os.Getwd() + if err != nil { + logger.WithError(err).Error("unable to determine current working directory") + } + logger.WithFields(logrus.Fields{ + "cwd": cwd, + "exedirname": exedirname, + }).Error("unable to find file") + return "" + } + + // Append a slash so that we can later just concatenate strings. + logrus.WithField("exedirname", exedirname).Debug("found file") + return exedirname + string(os.PathSeparator) +} diff --git a/pkg/shaman/httpserver/gzip.go b/pkg/shaman/httpserver/gzip.go new file mode 100644 index 00000000..a6653355 --- /dev/null +++ b/pkg/shaman/httpserver/gzip.go @@ -0,0 +1,87 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package httpserver + +import ( + "bytes" + "compress/gzip" + "errors" + "io" + "net/http" +) + +// Errors returned by DecompressedReader +var ( + ErrContentEncodingNotSupported = errors.New("Content-Encoding not supported") +) + +// wrapperCloserReader is a ReadCloser that closes both a wrapper and the wrapped reader. +type wrapperCloserReader struct { + wrapped io.ReadCloser + wrapper io.ReadCloser +} + +func (cr *wrapperCloserReader) Close() error { + errWrapped := cr.wrapped.Close() + errWrapper := cr.wrapper.Close() + + if errWrapped != nil { + return errWrapped + } + return errWrapper +} + +func (cr *wrapperCloserReader) Read(p []byte) (n int, err error) { + return cr.wrapper.Read(p) +} + +// DecompressedReader returns a reader that decompresses the body. +// The compression scheme is determined by the Content-Encoding header. +// Closing the returned reader is the caller's responsibility. +func DecompressedReader(request *http.Request) (io.ReadCloser, error) { + var wrapper io.ReadCloser + var err error + + switch request.Header.Get("Content-Encoding") { + case "gzip": + wrapper, err = gzip.NewReader(request.Body) + case "identity", "": + return request.Body, nil + default: + return nil, ErrContentEncodingNotSupported + } + + return &wrapperCloserReader{ + wrapped: request.Body, + wrapper: wrapper, + }, err +} + +// CompressBuffer GZip-compresses the payload into a buffer, and returns it. +func CompressBuffer(payload []byte) *bytes.Buffer { + var bodyBuf bytes.Buffer + compressor := gzip.NewWriter(&bodyBuf) + compressor.Write(payload) + compressor.Close() + return &bodyBuf +} diff --git a/pkg/shaman/httpserver/logging.go b/pkg/shaman/httpserver/logging.go new file mode 100644 index 00000000..a4ec51fa --- /dev/null +++ b/pkg/shaman/httpserver/logging.go @@ -0,0 +1,29 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package httpserver + +import ( + "github.com/sirupsen/logrus" +) + +var packageLogger = logrus.WithField("package", "shaman/httpserver") diff --git a/pkg/shaman/httpserver/testroutes.go b/pkg/shaman/httpserver/testroutes.go new file mode 100644 index 00000000..0432cce6 --- /dev/null +++ b/pkg/shaman/httpserver/testroutes.go @@ -0,0 +1,72 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package httpserver + +import ( + "fmt" + "net/http" + + "git.blender.org/flamenco/pkg/shaman/jwtauth" + "github.com/gorilla/mux" + "github.com/sirupsen/logrus" +) + +var userInfo = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + tokenSubject, ok := jwtauth.SubjectFromContext(r.Context()) + if !ok { + fmt.Fprintf(w, "You are unknown to me") + return + } + + fmt.Fprintf(w, "You are subject %s", tokenSubject) +}) + +// RegisterTestRoutes registers some routes that should only be used for testing. +func RegisterTestRoutes(r *mux.Router, auther jwtauth.Authenticator) { + // On the default page we will simply serve our static index page. + r.Handle("/", http.FileServer(http.Dir("./views/"))) + + // We will setup our server so we can serve static assest like images, css from the /static/{file} route + r.PathPrefix("/static/").Handler(http.StripPrefix("/static/", http.FileServer(http.Dir("./static/")))) + + getTokenHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + tokenString, err := auther.GenerateToken() + if err != nil { + logger := packageLogger.WithFields(logrus.Fields{ + logrus.ErrorKey: err, + "remoteAddr": r.RemoteAddr, + "requestURI": r.RequestURI, + "requestMethod": r.Method, + }) + logger.Warning("unable to sign JWT") + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte(fmt.Sprintf("error signing token: %v", err))) + return + } + + w.Write([]byte(tokenString)) + }) + + r.Handle("/get-token", getTokenHandler).Methods("GET") + r.Handle("/my-info", auther.Wrap(userInfo)).Methods("GET") +} diff --git a/pkg/shaman/humanize.go b/pkg/shaman/humanize.go new file mode 100644 index 00000000..1a36e80f --- /dev/null +++ b/pkg/shaman/humanize.go @@ -0,0 +1,47 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package shaman + +import "fmt" + +var byteSizeSuffixes = []string{"B", "KiB", "MiB", "GiB", "TiB"} + +func humanizeByteSize(size int64) string { + if size < 1024 { + return fmt.Sprintf("%d B", size) + } + roundedDown := float64(size) + lastIndex := len(byteSizeSuffixes) - 1 + + for index, suffix := range byteSizeSuffixes { + if roundedDown > 1024.0 && index < lastIndex { + roundedDown /= 1024.0 + continue + } + return fmt.Sprintf("%.1f %s", roundedDown, suffix) + } + + // This line should never be reached, but at least in that + // case we should at least return something correct. + return fmt.Sprintf("%d B", size) +} diff --git a/pkg/shaman/jwtauth/dummy.go b/pkg/shaman/jwtauth/dummy.go new file mode 100644 index 00000000..74e08052 --- /dev/null +++ b/pkg/shaman/jwtauth/dummy.go @@ -0,0 +1,11 @@ +package jwtauth + +// SPDX-License-Identifier: GPL-3.0-or-later + +/* This is just a dummy package. We still have to properly design authentication + * for Flamenco 3, but the ported code from Flamenco 2's Shaman implementation + * uses JWT Authentication. + */ + +type Authenticator interface { +} diff --git a/pkg/shaman/logging.go b/pkg/shaman/logging.go new file mode 100644 index 00000000..0bcfa268 --- /dev/null +++ b/pkg/shaman/logging.go @@ -0,0 +1,29 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package shaman + +import ( + "github.com/sirupsen/logrus" +) + +var packageLogger = logrus.WithField("package", "shaman") diff --git a/pkg/shaman/server.go b/pkg/shaman/server.go new file mode 100644 index 00000000..25a0f633 --- /dev/null +++ b/pkg/shaman/server.go @@ -0,0 +1,107 @@ +/* (c) 2019, Blender Foundation - Sybren A. Stüvel + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package shaman + +import ( + "sync" + + "git.blender.org/flamenco/pkg/shaman/checkout" + "git.blender.org/flamenco/pkg/shaman/config" + "git.blender.org/flamenco/pkg/shaman/fileserver" + "git.blender.org/flamenco/pkg/shaman/filestore" + "git.blender.org/flamenco/pkg/shaman/httpserver" + "git.blender.org/flamenco/pkg/shaman/jwtauth" + "github.com/gorilla/mux" +) + +// Server represents a Shaman Server. +type Server struct { + config config.Config + + auther jwtauth.Authenticator + fileStore filestore.Storage + fileServer *fileserver.FileServer + checkoutMan *checkout.Manager + + shutdownChan chan struct{} + wg sync.WaitGroup +} + +// NewServer creates a new Shaman server. +func NewServer(conf config.Config, auther jwtauth.Authenticator) *Server { + + if !conf.Enabled { + packageLogger.Warning("Shaman server is disabled") + return nil + } + + fileStore := filestore.New(conf) + checkoutMan := checkout.NewManager(conf, fileStore) + fileServer := fileserver.New(fileStore) + + shamanServer := &Server{ + conf, + auther, + fileStore, + fileServer, + checkoutMan, + + make(chan struct{}), + sync.WaitGroup{}, + } + + return shamanServer +} + +// Go starts goroutines for background operations. +// After Go() has been called, use Close() to stop those goroutines. +func (s *Server) Go() { + packageLogger.Info("Shaman server starting") + s.fileServer.Go() + + if s.config.GarbageCollect.Period == 0 { + packageLogger.Warning("garbage collection disabled, set garbageCollect.period > 0 in configuration") + } else if s.config.GarbageCollect.SilentlyDisable { + packageLogger.Debug("not starting garbage collection") + } else { + s.wg.Add(1) + go s.periodicCleanup() + } +} + +// AddRoutes adds the Shaman server endpoints to the given router. +func (s *Server) AddRoutes(router *mux.Router) { + s.checkoutMan.AddRoutes(router, s.auther) + s.fileServer.AddRoutes(router, s.auther) + + httpserver.RegisterTestRoutes(router, s.auther) +} + +// Close shuts down the Shaman server. +func (s *Server) Close() { + packageLogger.Info("shutting down Shaman server") + close(s.shutdownChan) + s.fileServer.Close() + s.checkoutMan.Close() + s.wg.Wait() +} diff --git a/pkg/shaman/touch/touch.go b/pkg/shaman/touch/touch.go new file mode 100644 index 00000000..6b3c8128 --- /dev/null +++ b/pkg/shaman/touch/touch.go @@ -0,0 +1,43 @@ +/* (c) 2019, Blender Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package touch + +import ( + "os" +) + +/* This package is a workaround for Golang issue #31880 "os.Chtimes only accepts explicit + * timestamps, does not work on SMB shares". See https://github.com/golang/go/issues/31880 + */ + +// Touch changes the file's mtime to 'now'. +func Touch(filename string) error { + if e := touch(filename); e != nil { + return &os.PathError{ + Op: "chtimes", + Path: filename, + Err: e, + } + } + return nil +} diff --git a/pkg/shaman/touch/touch_linux.go b/pkg/shaman/touch/touch_linux.go new file mode 100644 index 00000000..d3543f41 --- /dev/null +++ b/pkg/shaman/touch/touch_linux.go @@ -0,0 +1,43 @@ +/* (c) 2019, Blender Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package touch + +import ( + "syscall" + "unsafe" +) + +// touch is the same as syscall.utimes, but passes NULL as timestamp pointer (instead of a +// pointer to a concrete time). +func touch(path string) (err error) { + var _p0 *byte + _p0, err = syscall.BytePtrFromString(path) + if err != nil { + return + } + _, _, e1 := syscall.Syscall(syscall.SYS_UTIMES, uintptr(unsafe.Pointer(_p0)), uintptr(0), 0) + if e1 != 0 { + err = syscall.Errno(e1) + } + return +} diff --git a/pkg/shaman/touch/touch_nonlinux.go b/pkg/shaman/touch/touch_nonlinux.go new file mode 100644 index 00000000..55fefbbb --- /dev/null +++ b/pkg/shaman/touch/touch_nonlinux.go @@ -0,0 +1,36 @@ +// +build !linux + +/* (c) 2019, Blender Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package touch + +import ( + "os" + "time" +) + +// touch is a wrapper for os.Chtimes() passing 'now' as timestamp. +func touch(path string) (err error) { + now := time.Now() + return os.Chtimes(path, now, now) +} diff --git a/pkg/shaman/touch/touch_test.go b/pkg/shaman/touch/touch_test.go new file mode 100644 index 00000000..9b1428c7 --- /dev/null +++ b/pkg/shaman/touch/touch_test.go @@ -0,0 +1,54 @@ +/* (c) 2019, Blender Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package touch + +import ( + "io/ioutil" + "os" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestTouch(t *testing.T) { + testPath := "_touch_test.txt" + + // Create a file + assert.Nil(t, ioutil.WriteFile(testPath, []byte("just a test"), 0644)) + defer os.Remove(testPath) + + // Make it old + past := time.Now().Add(-5 * time.Hour) + assert.Nil(t, os.Chtimes(testPath, past, past)) + + // Touch & test + assert.Nil(t, Touch(testPath)) + + stat, err := os.Stat(testPath) + assert.Nil(t, err) + + threshold := time.Now().Add(-5 * time.Second) + assert.True(t, stat.ModTime().After(threshold), + "mtime should be after %v but is %v", threshold, stat.ModTime()) +}