mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-11-03 08:02:36 +09:00 
			
		
		
		
	Do not "guess" the file encoding/BOM when using API to upload files (#25828)
Related issue: #18368 It doesn't seem right to "guess" the file encoding/BOM when using API to upload files. The API should save the uploaded content as-is.
This commit is contained in:
		@@ -4,7 +4,6 @@
 | 
				
			|||||||
package files
 | 
					package files
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import (
 | 
					import (
 | 
				
			||||||
	"bytes"
 | 
					 | 
				
			||||||
	"context"
 | 
						"context"
 | 
				
			||||||
	"fmt"
 | 
						"fmt"
 | 
				
			||||||
	"path"
 | 
						"path"
 | 
				
			||||||
@@ -12,21 +11,15 @@ import (
 | 
				
			|||||||
	"time"
 | 
						"time"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	"code.gitea.io/gitea/models"
 | 
						"code.gitea.io/gitea/models"
 | 
				
			||||||
	"code.gitea.io/gitea/models/db"
 | 
					 | 
				
			||||||
	git_model "code.gitea.io/gitea/models/git"
 | 
						git_model "code.gitea.io/gitea/models/git"
 | 
				
			||||||
	repo_model "code.gitea.io/gitea/models/repo"
 | 
						repo_model "code.gitea.io/gitea/models/repo"
 | 
				
			||||||
	user_model "code.gitea.io/gitea/models/user"
 | 
						user_model "code.gitea.io/gitea/models/user"
 | 
				
			||||||
	"code.gitea.io/gitea/modules/charset"
 | 
					 | 
				
			||||||
	"code.gitea.io/gitea/modules/git"
 | 
						"code.gitea.io/gitea/modules/git"
 | 
				
			||||||
	"code.gitea.io/gitea/modules/lfs"
 | 
						"code.gitea.io/gitea/modules/lfs"
 | 
				
			||||||
	"code.gitea.io/gitea/modules/log"
 | 
						"code.gitea.io/gitea/modules/log"
 | 
				
			||||||
	"code.gitea.io/gitea/modules/setting"
 | 
						"code.gitea.io/gitea/modules/setting"
 | 
				
			||||||
	"code.gitea.io/gitea/modules/structs"
 | 
						"code.gitea.io/gitea/modules/structs"
 | 
				
			||||||
	"code.gitea.io/gitea/modules/util"
 | 
					 | 
				
			||||||
	asymkey_service "code.gitea.io/gitea/services/asymkey"
 | 
						asymkey_service "code.gitea.io/gitea/services/asymkey"
 | 
				
			||||||
 | 
					 | 
				
			||||||
	stdcharset "golang.org/x/net/html/charset"
 | 
					 | 
				
			||||||
	"golang.org/x/text/transform"
 | 
					 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// IdentityOptions for a person's identity like an author or committer
 | 
					// IdentityOptions for a person's identity like an author or committer
 | 
				
			||||||
@@ -66,78 +59,9 @@ type ChangeRepoFilesOptions struct {
 | 
				
			|||||||
type RepoFileOptions struct {
 | 
					type RepoFileOptions struct {
 | 
				
			||||||
	treePath     string
 | 
						treePath     string
 | 
				
			||||||
	fromTreePath string
 | 
						fromTreePath string
 | 
				
			||||||
	encoding     string
 | 
					 | 
				
			||||||
	bom          bool
 | 
					 | 
				
			||||||
	executable   bool
 | 
						executable   bool
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func detectEncodingAndBOM(entry *git.TreeEntry, repo *repo_model.Repository) (string, bool) {
 | 
					 | 
				
			||||||
	reader, err := entry.Blob().DataAsync()
 | 
					 | 
				
			||||||
	if err != nil {
 | 
					 | 
				
			||||||
		// return default
 | 
					 | 
				
			||||||
		return "UTF-8", false
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	defer reader.Close()
 | 
					 | 
				
			||||||
	buf := make([]byte, 1024)
 | 
					 | 
				
			||||||
	n, err := util.ReadAtMost(reader, buf)
 | 
					 | 
				
			||||||
	if err != nil {
 | 
					 | 
				
			||||||
		// return default
 | 
					 | 
				
			||||||
		return "UTF-8", false
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	buf = buf[:n]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if setting.LFS.StartServer {
 | 
					 | 
				
			||||||
		pointer, _ := lfs.ReadPointerFromBuffer(buf)
 | 
					 | 
				
			||||||
		if pointer.IsValid() {
 | 
					 | 
				
			||||||
			meta, err := git_model.GetLFSMetaObjectByOid(db.DefaultContext, repo.ID, pointer.Oid)
 | 
					 | 
				
			||||||
			if err != nil && err != git_model.ErrLFSObjectNotExist {
 | 
					 | 
				
			||||||
				// return default
 | 
					 | 
				
			||||||
				return "UTF-8", false
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
			if meta != nil {
 | 
					 | 
				
			||||||
				dataRc, err := lfs.ReadMetaObject(pointer)
 | 
					 | 
				
			||||||
				if err != nil {
 | 
					 | 
				
			||||||
					// return default
 | 
					 | 
				
			||||||
					return "UTF-8", false
 | 
					 | 
				
			||||||
				}
 | 
					 | 
				
			||||||
				defer dataRc.Close()
 | 
					 | 
				
			||||||
				buf = make([]byte, 1024)
 | 
					 | 
				
			||||||
				n, err = util.ReadAtMost(dataRc, buf)
 | 
					 | 
				
			||||||
				if err != nil {
 | 
					 | 
				
			||||||
					// return default
 | 
					 | 
				
			||||||
					return "UTF-8", false
 | 
					 | 
				
			||||||
				}
 | 
					 | 
				
			||||||
				buf = buf[:n]
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	encoding, err := charset.DetectEncoding(buf)
 | 
					 | 
				
			||||||
	if err != nil {
 | 
					 | 
				
			||||||
		// just default to utf-8 and no bom
 | 
					 | 
				
			||||||
		return "UTF-8", false
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	if encoding == "UTF-8" {
 | 
					 | 
				
			||||||
		return encoding, bytes.Equal(buf[0:3], charset.UTF8BOM)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	charsetEncoding, _ := stdcharset.Lookup(encoding)
 | 
					 | 
				
			||||||
	if charsetEncoding == nil {
 | 
					 | 
				
			||||||
		return "UTF-8", false
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	result, n, err := transform.String(charsetEncoding.NewDecoder(), string(buf))
 | 
					 | 
				
			||||||
	if err != nil {
 | 
					 | 
				
			||||||
		// return default
 | 
					 | 
				
			||||||
		return "UTF-8", false
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if n > 2 {
 | 
					 | 
				
			||||||
		return encoding, bytes.Equal([]byte(result)[0:3], charset.UTF8BOM)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return encoding, false
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// ChangeRepoFiles adds, updates or removes multiple files in the given repository
 | 
					// ChangeRepoFiles adds, updates or removes multiple files in the given repository
 | 
				
			||||||
func ChangeRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *user_model.User, opts *ChangeRepoFilesOptions) (*structs.FilesResponse, error) {
 | 
					func ChangeRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *user_model.User, opts *ChangeRepoFilesOptions) (*structs.FilesResponse, error) {
 | 
				
			||||||
	// If no branch name is set, assume default branch
 | 
						// If no branch name is set, assume default branch
 | 
				
			||||||
@@ -184,8 +108,6 @@ func ChangeRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *use
 | 
				
			|||||||
		file.Options = &RepoFileOptions{
 | 
							file.Options = &RepoFileOptions{
 | 
				
			||||||
			treePath:     treePath,
 | 
								treePath:     treePath,
 | 
				
			||||||
			fromTreePath: fromTreePath,
 | 
								fromTreePath: fromTreePath,
 | 
				
			||||||
			encoding:     "UTF-8",
 | 
					 | 
				
			||||||
			bom:          false,
 | 
					 | 
				
			||||||
			executable:   false,
 | 
								executable:   false,
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		treePaths = append(treePaths, treePath)
 | 
							treePaths = append(treePaths, treePath)
 | 
				
			||||||
@@ -381,7 +303,6 @@ func handleCheckErrors(file *ChangeRepoFile, commit *git.Commit, opts *ChangeRep
 | 
				
			|||||||
			// haven't been made. We throw an error if one wasn't provided.
 | 
								// haven't been made. We throw an error if one wasn't provided.
 | 
				
			||||||
			return models.ErrSHAOrCommitIDNotProvided{}
 | 
								return models.ErrSHAOrCommitIDNotProvided{}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		file.Options.encoding, file.Options.bom = detectEncodingAndBOM(fromEntry, repo)
 | 
					 | 
				
			||||||
		file.Options.executable = fromEntry.IsExecutable()
 | 
							file.Options.executable = fromEntry.IsExecutable()
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	if file.Operation == "create" || file.Operation == "update" {
 | 
						if file.Operation == "create" || file.Operation == "update" {
 | 
				
			||||||
@@ -466,28 +387,8 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file
 | 
				
			|||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	content := file.Content
 | 
						treeObjectContent := file.Content
 | 
				
			||||||
	if file.Options.bom {
 | 
					 | 
				
			||||||
		content = string(charset.UTF8BOM) + content
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	if file.Options.encoding != "UTF-8" {
 | 
					 | 
				
			||||||
		charsetEncoding, _ := stdcharset.Lookup(file.Options.encoding)
 | 
					 | 
				
			||||||
		if charsetEncoding != nil {
 | 
					 | 
				
			||||||
			result, _, err := transform.String(charsetEncoding.NewEncoder(), content)
 | 
					 | 
				
			||||||
			if err != nil {
 | 
					 | 
				
			||||||
				// Look if we can't encode back in to the original we should just stick with utf-8
 | 
					 | 
				
			||||||
				log.Error("Error re-encoding %s (%s) as %s - will stay as UTF-8: %v", file.TreePath, file.FromTreePath, file.Options.encoding, err)
 | 
					 | 
				
			||||||
				result = content
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
			content = result
 | 
					 | 
				
			||||||
		} else {
 | 
					 | 
				
			||||||
			log.Error("Unknown encoding: %s", file.Options.encoding)
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	// Reset the opts.Content to our adjusted content to ensure that LFS gets the correct content
 | 
					 | 
				
			||||||
	file.Content = content
 | 
					 | 
				
			||||||
	var lfsMetaObject *git_model.LFSMetaObject
 | 
						var lfsMetaObject *git_model.LFSMetaObject
 | 
				
			||||||
 | 
					 | 
				
			||||||
	if setting.LFS.StartServer && hasOldBranch {
 | 
						if setting.LFS.StartServer && hasOldBranch {
 | 
				
			||||||
		// Check there is no way this can return multiple infos
 | 
							// Check there is no way this can return multiple infos
 | 
				
			||||||
		filename2attribute2info, err := t.gitRepo.CheckAttribute(git.CheckAttributeOpts{
 | 
							filename2attribute2info, err := t.gitRepo.CheckAttribute(git.CheckAttributeOpts{
 | 
				
			||||||
@@ -506,12 +407,12 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file
 | 
				
			|||||||
				return err
 | 
									return err
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
			lfsMetaObject = &git_model.LFSMetaObject{Pointer: pointer, RepositoryID: repoID}
 | 
								lfsMetaObject = &git_model.LFSMetaObject{Pointer: pointer, RepositoryID: repoID}
 | 
				
			||||||
			content = pointer.StringContent()
 | 
								treeObjectContent = pointer.StringContent()
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// Add the object to the database
 | 
						// Add the object to the database
 | 
				
			||||||
	objectHash, err := t.HashObject(strings.NewReader(content))
 | 
						objectHash, err := t.HashObject(strings.NewReader(treeObjectContent))
 | 
				
			||||||
	if err != nil {
 | 
						if err != nil {
 | 
				
			||||||
		return err
 | 
							return err
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user