Improve control char rendering and escape button styling (#37094)

Follow-up to #37078.

- Use Unicode Control Pictures](U+2400-U+2421) to render C0 control characters
- Make it work in diff view too
- Replace escape warning emoji with SVG
- Align escape warning button with code lines

---------

Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
silverwind
2026-04-06 13:07:33 +02:00
committed by GitHub
parent e47c6135dd
commit 423cdd4d94
37 changed files with 1561 additions and 1794 deletions
+7 -7
View File
@@ -1,4 +1,3 @@
// This file is generated by modules/charset/ambiguous/generate.go DO NOT EDIT
// Copyright 2022 The Gitea Authors. All rights reserved. // Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
@@ -14,11 +13,12 @@ import (
// AmbiguousTablesForLocale provides the table of ambiguous characters for this locale. // AmbiguousTablesForLocale provides the table of ambiguous characters for this locale.
func AmbiguousTablesForLocale(locale translation.Locale) []*AmbiguousTable { func AmbiguousTablesForLocale(locale translation.Locale) []*AmbiguousTable {
ambiguousTableMap := globalVars().ambiguousTableMap
key := locale.Language() key := locale.Language()
var table *AmbiguousTable var table *AmbiguousTable
var ok bool var ok bool
for len(key) > 0 { for len(key) > 0 {
if table, ok = AmbiguousCharacters[key]; ok { if table, ok = ambiguousTableMap[key]; ok {
break break
} }
idx := strings.LastIndexAny(key, "-_") idx := strings.LastIndexAny(key, "-_")
@@ -29,18 +29,18 @@ func AmbiguousTablesForLocale(locale translation.Locale) []*AmbiguousTable {
} }
} }
if table == nil && (locale.Language() == "zh-CN" || locale.Language() == "zh_CN") { if table == nil && (locale.Language() == "zh-CN" || locale.Language() == "zh_CN") {
table = AmbiguousCharacters["zh-hans"] table = ambiguousTableMap["zh-hans"]
} }
if table == nil && strings.HasPrefix(locale.Language(), "zh") { if table == nil && strings.HasPrefix(locale.Language(), "zh") {
table = AmbiguousCharacters["zh-hant"] table = ambiguousTableMap["zh-hant"]
} }
if table == nil { if table == nil {
table = AmbiguousCharacters["_default"] table = ambiguousTableMap["_default"]
} }
return []*AmbiguousTable{ return []*AmbiguousTable{
table, table,
AmbiguousCharacters["_common"], ambiguousTableMap["_common"],
} }
} }
@@ -52,7 +52,7 @@ func isAmbiguous(r rune, confusableTo *rune, tables ...*AmbiguousTable) bool {
i := sort.Search(len(table.Confusable), func(i int) bool { i := sort.Search(len(table.Confusable), func(i int) bool {
return table.Confusable[i] >= r return table.Confusable[i] >= r
}) })
(*confusableTo) = table.With[i] *confusableTo = table.With[i]
return true return true
} }
return false return false
-188
View File
@@ -1,188 +0,0 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package main
import (
"bytes"
"flag"
"fmt"
"go/format"
"os"
"sort"
"text/template"
"unicode"
"code.gitea.io/gitea/modules/json"
"golang.org/x/text/unicode/rangetable"
)
// ambiguous.json provides a one to one mapping of ambiguous characters to other characters
// See https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json
type AmbiguousTable struct {
Confusable []rune
With []rune
Locale string
RangeTable *unicode.RangeTable
}
type RunePair struct {
Confusable rune
With rune
}
var verbose bool
func main() {
flag.Usage = func() {
fmt.Fprintf(os.Stderr, `%s: Generate AmbiguousCharacter
Usage: %[1]s [-v] [-o output.go] ambiguous.json
`, os.Args[0])
flag.PrintDefaults()
}
output := ""
flag.BoolVar(&verbose, "v", false, "verbose output")
flag.StringVar(&output, "o", "ambiguous_gen.go", "file to output to")
flag.Parse()
input := flag.Arg(0)
if input == "" {
input = "ambiguous.json"
}
bs, err := os.ReadFile(input)
if err != nil {
fatalf("Unable to read: %s Err: %v", input, err)
}
var unwrapped string
if err := json.Unmarshal(bs, &unwrapped); err != nil {
fatalf("Unable to unwrap content in: %s Err: %v", input, err)
}
fromJSON := map[string][]uint32{}
if err := json.Unmarshal([]byte(unwrapped), &fromJSON); err != nil {
fatalf("Unable to unmarshal content in: %s Err: %v", input, err)
}
tables := make([]*AmbiguousTable, 0, len(fromJSON))
for locale, chars := range fromJSON {
table := &AmbiguousTable{Locale: locale}
table.Confusable = make([]rune, 0, len(chars)/2)
table.With = make([]rune, 0, len(chars)/2)
pairs := make([]RunePair, len(chars)/2)
for i := 0; i < len(chars); i += 2 {
pairs[i/2].Confusable, pairs[i/2].With = rune(chars[i]), rune(chars[i+1])
}
sort.Slice(pairs, func(i, j int) bool {
return pairs[i].Confusable < pairs[j].Confusable
})
for _, pair := range pairs {
table.Confusable = append(table.Confusable, pair.Confusable)
table.With = append(table.With, pair.With)
}
table.RangeTable = rangetable.New(table.Confusable...)
tables = append(tables, table)
}
sort.Slice(tables, func(i, j int) bool {
return tables[i].Locale < tables[j].Locale
})
data := map[string]any{
"Tables": tables,
}
if err := runTemplate(generatorTemplate, output, &data); err != nil {
fatalf("Unable to run template: %v", err)
}
}
func runTemplate(t *template.Template, filename string, data any) error {
buf := bytes.NewBuffer(nil)
if err := t.Execute(buf, data); err != nil {
return fmt.Errorf("unable to execute template: %w", err)
}
bs, err := format.Source(buf.Bytes())
if err != nil {
verbosef("Bad source:\n%s", buf.String())
return fmt.Errorf("unable to format source: %w", err)
}
old, err := os.ReadFile(filename)
if err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to read old file %s because %w", filename, err)
} else if err == nil {
if bytes.Equal(bs, old) {
// files are the same don't rewrite it.
return nil
}
}
file, err := os.Create(filename)
if err != nil {
return fmt.Errorf("failed to create file %s because %w", filename, err)
}
defer file.Close()
_, err = file.Write(bs)
if err != nil {
return fmt.Errorf("unable to write generated source: %w", err)
}
return nil
}
var generatorTemplate = template.Must(template.New("ambiguousTemplate").Parse(`// This file is generated by modules/charset/ambiguous/generate.go DO NOT EDIT
// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package charset
import "unicode"
// This file is generated from https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json
// AmbiguousTable matches a confusable rune with its partner for the Locale
type AmbiguousTable struct {
Confusable []rune
With []rune
Locale string
RangeTable *unicode.RangeTable
}
// AmbiguousCharacters provides a map by locale name to the confusable characters in that locale
var AmbiguousCharacters = map[string]*AmbiguousTable{
{{range .Tables}}{{printf "%q:" .Locale}} {
Confusable: []rune{ {{range .Confusable}}{{.}},{{end}} },
With: []rune{ {{range .With}}{{.}},{{end}} },
Locale: {{printf "%q" .Locale}},
RangeTable: &unicode.RangeTable{
R16: []unicode.Range16{
{{range .RangeTable.R16 }} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}},
{{end}} },
R32: []unicode.Range32{
{{range .RangeTable.R32}} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}},
{{end}} },
LatinOffset: {{.RangeTable.LatinOffset}},
},
},
{{end}}
}
`))
func logf(format string, args ...any) {
fmt.Fprintf(os.Stderr, format+"\n", args...)
}
func verbosef(format string, args ...any) {
if verbose {
logf(format, args...)
}
}
func fatalf(format string, args ...any) {
logf("fatal: "+format+"\n", args...)
os.Exit(1)
}
File diff suppressed because one or more lines are too long
+7 -1
View File
@@ -8,11 +8,13 @@ import (
"testing" "testing"
"unicode" "unicode"
"code.gitea.io/gitea/modules/translation"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
func TestAmbiguousCharacters(t *testing.T) { func TestAmbiguousCharacters(t *testing.T) {
for locale, ambiguous := range AmbiguousCharacters { for locale, ambiguous := range globalVars().ambiguousTableMap {
assert.Equal(t, locale, ambiguous.Locale) assert.Equal(t, locale, ambiguous.Locale)
assert.Len(t, ambiguous.With, len(ambiguous.Confusable)) assert.Len(t, ambiguous.With, len(ambiguous.Confusable))
assert.True(t, sort.SliceIsSorted(ambiguous.Confusable, func(i, j int) bool { assert.True(t, sort.SliceIsSorted(ambiguous.Confusable, func(i, j int) bool {
@@ -28,4 +30,8 @@ func TestAmbiguousCharacters(t *testing.T) {
assert.True(t, found, "%c is not in %d", confusable, i) assert.True(t, found, "%c is not in %d", confusable, i)
} }
} }
var confusableTo rune
ret := isAmbiguous('𝐾', &confusableTo, AmbiguousTablesForLocale(&translation.MockLocale{})...)
assert.True(t, ret)
} }
-43
View File
@@ -1,43 +0,0 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package charset
import (
"bytes"
"io"
)
// BreakWriter wraps an io.Writer to always write '\n' as '<br>'
type BreakWriter struct {
io.Writer
}
// Write writes the provided byte slice transparently replacing '\n' with '<br>'
func (b *BreakWriter) Write(bs []byte) (n int, err error) {
pos := 0
for pos < len(bs) {
idx := bytes.IndexByte(bs[pos:], '\n')
if idx < 0 {
wn, err := b.Writer.Write(bs[pos:])
return n + wn, err
}
if idx > 0 {
wn, err := b.Writer.Write(bs[pos : pos+idx])
n += wn
if err != nil {
return n, err
}
}
if _, err = b.Writer.Write([]byte("<br>")); err != nil {
return n, err
}
pos += idx + 1
n++
}
return n, err
}
-68
View File
@@ -1,68 +0,0 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package charset
import (
"strings"
"testing"
)
func TestBreakWriter_Write(t *testing.T) {
tests := []struct {
name string
kase string
expect string
wantErr bool
}{
{
name: "noline",
kase: "abcdefghijklmnopqrstuvwxyz",
expect: "abcdefghijklmnopqrstuvwxyz",
},
{
name: "endline",
kase: "abcdefghijklmnopqrstuvwxyz\n",
expect: "abcdefghijklmnopqrstuvwxyz<br>",
},
{
name: "startline",
kase: "\nabcdefghijklmnopqrstuvwxyz",
expect: "<br>abcdefghijklmnopqrstuvwxyz",
},
{
name: "onlyline",
kase: "\n\n\n",
expect: "<br><br><br>",
},
{
name: "empty",
kase: "",
expect: "",
},
{
name: "midline",
kase: "\nabc\ndefghijkl\nmnopqrstuvwxy\nz",
expect: "<br>abc<br>defghijkl<br>mnopqrstuvwxy<br>z",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
buf := &strings.Builder{}
b := &BreakWriter{
Writer: buf,
}
n, err := b.Write([]byte(tt.kase))
if (err != nil) != tt.wantErr {
t.Errorf("BreakWriter.Write() error = %v, wantErr %v", err, tt.wantErr)
return
}
if n != len(tt.kase) {
t.Errorf("BreakWriter.Write() = %v, want %v", n, len(tt.kase))
}
if buf.String() != tt.expect {
t.Errorf("BreakWriter.Write() wrote %q, want %v", buf.String(), tt.expect)
}
})
}
}
+17 -3
View File
@@ -6,7 +6,10 @@ package charset
import ( import (
"bytes" "bytes"
"io" "io"
"regexp"
"strings" "strings"
"sync"
"unicode"
"unicode/utf8" "unicode/utf8"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
@@ -17,8 +20,19 @@ import (
"golang.org/x/text/transform" "golang.org/x/text/transform"
) )
// UTF8BOM is the utf-8 byte-order marker var globalVars = sync.OnceValue(func() (ret struct {
var UTF8BOM = []byte{'\xef', '\xbb', '\xbf'} utf8Bom []byte
defaultWordRegexp *regexp.Regexp
ambiguousTableMap map[string]*AmbiguousTable
invisibleRangeTable *unicode.RangeTable
},
) {
ret.utf8Bom = []byte{'\xef', '\xbb', '\xbf'}
ret.ambiguousTableMap = newAmbiguousTableMap()
ret.invisibleRangeTable = newInvisibleRangeTable()
return ret
})
type ConvertOpts struct { type ConvertOpts struct {
KeepBOM bool KeepBOM bool
@@ -105,7 +119,7 @@ func maybeRemoveBOM(content []byte, opts ConvertOpts) []byte {
if opts.KeepBOM { if opts.KeepBOM {
return content return content
} }
return bytes.TrimPrefix(content, UTF8BOM) return bytes.TrimPrefix(content, globalVars().utf8Bom)
} }
// DetectEncoding detect the encoding of content // DetectEncoding detect the encoding of content
+18 -22
View File
@@ -1,10 +1,6 @@
// Copyright 2022 The Gitea Authors. All rights reserved. // Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
//go:generate go run invisible/generate.go -v -o ./invisible_gen.go
//go:generate go run ambiguous/generate.go -v -o ./ambiguous_gen.go ambiguous/ambiguous.json
package charset package charset
import ( import (
@@ -12,36 +8,36 @@ import (
"io" "io"
"strings" "strings"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/translation" "code.gitea.io/gitea/modules/translation"
) )
// RuneNBSP is the codepoint for NBSP type EscapeOptions struct {
const RuneNBSP = 0xa0 Allowed map[rune]bool
}
func AllowRuneNBSP() map[rune]bool {
return map[rune]bool{0xa0: true}
}
func EscapeOptionsForView() EscapeOptions {
return EscapeOptions{
// it's safe to see NBSP in the view, but maybe not in the diff
Allowed: AllowRuneNBSP(),
}
}
// EscapeControlHTML escapes the Unicode control sequences in a provided html document // EscapeControlHTML escapes the Unicode control sequences in a provided html document
func EscapeControlHTML(html template.HTML, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output template.HTML) { func EscapeControlHTML(html template.HTML, locale translation.Locale, opts ...EscapeOptions) (escaped *EscapeStatus, output template.HTML) {
if !setting.UI.AmbiguousUnicodeDetection { if !setting.UI.AmbiguousUnicodeDetection {
return &EscapeStatus{}, html return &EscapeStatus{}, html
} }
sb := &strings.Builder{} sb := &strings.Builder{}
escaped, _ = EscapeControlReader(strings.NewReader(string(html)), sb, locale, allowed...) // err has been handled in EscapeControlReader escaped, _ = EscapeControlReader(strings.NewReader(string(html)), sb, locale, opts...) // err has been handled in EscapeControlReader
return escaped, template.HTML(sb.String()) return escaped, template.HTML(sb.String())
} }
// EscapeControlReader escapes the Unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus // EscapeControlReader escapes the Unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus
func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) { func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, opts ...EscapeOptions) (*EscapeStatus, error) {
if !setting.UI.AmbiguousUnicodeDetection { return escapeStream(locale, reader, writer, opts...)
_, err = io.Copy(writer, reader)
return &EscapeStatus{}, err
}
outputStream := &HTMLStreamerWriter{Writer: writer}
streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)
if err = StreamHTML(reader, streamer); err != nil {
streamer.escaped.HasError = true
log.Error("Error whilst escaping: %v", err)
}
return streamer.escaped, err
} }
+2 -6
View File
@@ -3,11 +3,9 @@
package charset package charset
// EscapeStatus represents the findings of the unicode escaper // EscapeStatus represents the findings of the Unicode escaper
type EscapeStatus struct { type EscapeStatus struct {
Escaped bool Escaped bool // it means that some characters were escaped, and they can also be unescaped back
HasError bool
HasBadRunes bool
HasInvisible bool HasInvisible bool
HasAmbiguous bool HasAmbiguous bool
} }
@@ -19,8 +17,6 @@ func (status *EscapeStatus) Or(other *EscapeStatus) *EscapeStatus {
st = &EscapeStatus{} st = &EscapeStatus{}
} }
st.Escaped = st.Escaped || other.Escaped st.Escaped = st.Escaped || other.Escaped
st.HasError = st.HasError || other.HasError
st.HasBadRunes = st.HasBadRunes || other.HasBadRunes
st.HasAmbiguous = st.HasAmbiguous || other.HasAmbiguous st.HasAmbiguous = st.HasAmbiguous || other.HasAmbiguous
st.HasInvisible = st.HasInvisible || other.HasInvisible st.HasInvisible = st.HasInvisible || other.HasInvisible
return st return st
+353 -226
View File
@@ -4,288 +4,415 @@
package charset package charset
import ( import (
"bytes"
"fmt" "fmt"
"regexp" "html"
"strings" "io"
"unicode" "unicode"
"unicode/utf8" "unicode/utf8"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/translation" "code.gitea.io/gitea/modules/translation"
"golang.org/x/net/html"
) )
// VScode defaultWordRegexp type htmlChunkReader struct {
var defaultWordRegexp = regexp.MustCompile(`(-?\d*\.\d\w*)|([^\` + "`" + `\~\!\@\#\$\%\^\&\*\(\)\-\=\+\[\{\]\}\\\|\;\:\'\"\,\.\<\>\/\?\s\x00-\x1f]+)`) in io.Reader
readErr error
func NewEscapeStreamer(locale translation.Locale, next HTMLStreamer, allowed ...rune) HTMLStreamer { readBuf []byte
allowedM := make(map[rune]bool, len(allowed)) curInTag bool
for _, v := range allowed {
allowedM[v] = true
}
return &escapeStreamer{
escaped: &EscapeStatus{},
PassthroughHTMLStreamer: *NewPassthroughStreamer(next),
locale: locale,
ambiguousTables: AmbiguousTablesForLocale(locale),
allowed: allowedM,
}
} }
type escapeStreamer struct { type escapeStreamer struct {
PassthroughHTMLStreamer htmlChunkReader
escaped *EscapeStatus escaped *EscapeStatus
locale translation.Locale locale translation.Locale
ambiguousTables []*AmbiguousTable ambiguousTables []*AmbiguousTable
allowed map[rune]bool allowed map[rune]bool
out io.Writer
} }
func (e *escapeStreamer) EscapeStatus() *EscapeStatus { func escapeStream(locale translation.Locale, in io.Reader, out io.Writer, opts ...EscapeOptions) (*EscapeStatus, error) {
return e.escaped es := &escapeStreamer{
escaped: &EscapeStatus{},
locale: locale,
ambiguousTables: AmbiguousTablesForLocale(locale),
htmlChunkReader: htmlChunkReader{
in: in,
readBuf: make([]byte, 0, 32*1024),
},
out: out,
}
if len(opts) > 0 {
es.allowed = opts[0].Allowed
}
readCount := 0
lastIsTag := false
for {
parts, partInTag, err := es.readRunes()
readCount++
if err == io.EOF {
return es.escaped, nil
} else if err != nil {
return nil, err
}
for i, part := range parts {
if partInTag[i] {
lastIsTag = true
if _, err := out.Write(part); err != nil {
return nil, err
}
} else {
// if last part is tag, then this part is content begin
// if the content is the first part of the first read, then it's also content begin
isContentBegin := lastIsTag || (readCount == 1 && i == 0)
lastIsTag = false
if isContentBegin {
if part, err = es.trimAndWriteBom(part); err != nil {
return nil, err
}
}
if err = es.detectAndWriteRunes(part); err != nil {
return nil, err
}
}
}
}
} }
// Text tells the next streamer there is a text func (e *escapeStreamer) trimAndWriteBom(part []byte) ([]byte, error) {
func (e *escapeStreamer) Text(data string) error { remaining, ok := bytes.CutPrefix(part, globalVars().utf8Bom)
sb := &strings.Builder{} if ok {
var until int part = remaining
var next int if _, err := e.out.Write(globalVars().utf8Bom); err != nil {
return part, err
}
}
return part, nil
}
const longSentenceDetectionLimit = 20
func (e *escapeStreamer) possibleLongSentence(results []detectResult, pos int) bool {
countBasic := 0
countNonASCII := 0
for i := max(pos-longSentenceDetectionLimit, 0); i < min(pos+longSentenceDetectionLimit, len(results)); i++ {
if results[i].runeType == runeTypeBasic && results[i].runeChar != ' ' {
countBasic++
}
if results[i].runeType == runeTypeNonASCII || results[i].runeType == runeTypeAmbiguous {
countNonASCII++
}
}
countChar := countBasic + countNonASCII
// many non-ASCII runes around, it seems to be a sentence,
// don't handle the invisible/ambiguous chars in it, otherwise it will be too noisy
return countChar != 0 && countNonASCII*100/countChar >= 50
}
func (e *escapeStreamer) analyzeDetectResults(results []detectResult) {
for i := range results {
res := &results[i]
if res.runeType == runeTypeInvisible || res.runeType == runeTypeAmbiguous {
leftIsNonASCII := i > 0 && (results[i-1].runeType == runeTypeNonASCII || results[i-1].runeType == runeTypeAmbiguous)
rightIsNonASCII := i < len(results)-1 && (results[i+1].runeType == runeTypeNonASCII || results[i+1].runeType == runeTypeAmbiguous)
surroundingNonASCII := leftIsNonASCII || rightIsNonASCII
if !surroundingNonASCII {
if len(results) < longSentenceDetectionLimit {
res.needEscape = setting.UI.AmbiguousUnicodeDetection
} else if !e.possibleLongSentence(results, i) {
res.needEscape = setting.UI.AmbiguousUnicodeDetection
}
}
}
}
}
func (e *escapeStreamer) detectAndWriteRunes(part []byte) error {
results := e.detectRunes(part)
e.analyzeDetectResults(results)
return e.writeDetectResults(part, results)
}
func (e *htmlChunkReader) readRunes() (parts [][]byte, partInTag []bool, _ error) {
// we have read everything, eof
if e.readErr != nil && len(e.readBuf) == 0 {
return nil, nil, e.readErr
}
// not eof, and the there is space in the buffer, try to read more data
if e.readErr == nil && len(e.readBuf) <= cap(e.readBuf)*3/4 {
n, err := e.in.Read(e.readBuf[len(e.readBuf):cap(e.readBuf)])
e.readErr = err
e.readBuf = e.readBuf[:len(e.readBuf)+n]
}
if len(e.readBuf) == 0 {
return nil, nil, e.readErr
}
// try to exact tag parts and content parts
pos := 0 pos := 0
if len(data) > len(UTF8BOM) && data[:len(UTF8BOM)] == string(UTF8BOM) { for pos < len(e.readBuf) {
_, _ = sb.WriteString(data[:len(UTF8BOM)]) var curPartEnd int
pos = len(UTF8BOM) nextInTag := e.curInTag
} if e.curInTag {
dataBytes := []byte(data) // if cur part is in tag, try to find the tag close char '>'
for pos < len(data) { idx := bytes.IndexByte(e.readBuf[pos:], '>')
nextIdxs := defaultWordRegexp.FindStringIndex(data[pos:]) if idx == -1 {
if nextIdxs == nil { // if no tag close char, then the whole buffer is in tag
until = len(data) curPartEnd = len(e.readBuf)
next = until } else {
} else { // tag part ends, switch to content part
until = min(nextIdxs[0]+pos, len(data)) curPartEnd = pos + idx + 1
next = min(nextIdxs[1]+pos, len(data)) nextInTag = !nextInTag
}
// from pos until we know that the runes are not \r\t\n or even ' '
n := next - until
runes := make([]rune, 0, n)
positions := make([]int, 0, n+1)
for pos < until {
r, sz := utf8.DecodeRune(dataBytes[pos:])
positions = positions[:0]
positions = append(positions, pos, pos+sz)
types, confusables, _ := e.runeTypes(r)
if err := e.handleRunes(dataBytes, []rune{r}, positions, types, confusables, sb); err != nil {
return err
}
pos += sz
}
for i := pos; i < next; {
r, sz := utf8.DecodeRune(dataBytes[i:])
runes = append(runes, r)
positions = append(positions, i)
i += sz
}
positions = append(positions, next)
types, confusables, runeCounts := e.runeTypes(runes...)
if runeCounts.needsEscape() {
if err := e.handleRunes(dataBytes, runes, positions, types, confusables, sb); err != nil {
return err
} }
} else { } else {
_, _ = sb.Write(dataBytes[pos:next]) // if cur part is in content, try to find the tag open char '<'
idx := bytes.IndexByte(e.readBuf[pos:], '<')
if idx == -1 {
// if no tag open char, then the whole buffer is in content
curPartEnd = len(e.readBuf)
} else {
// content part ends, switch to tag part
curPartEnd = pos + idx
nextInTag = !nextInTag
}
}
curPartLen := curPartEnd - pos
if curPartLen == 0 {
// if cur part is empty, only need to switch the part type
if e.curInTag == nextInTag {
panic("impossible, curPartLen is 0 but the part in tag status is not switched")
}
e.curInTag = nextInTag
continue
}
// now, curPartLen can't be 0
curPart := make([]byte, curPartLen)
copy(curPart, e.readBuf[pos:curPartEnd])
// now we get the curPart bytes, but we can't directly use it, the last rune in it might have been cut
// try to decode the last rune, if it's invalid, then we cut the last byte and try again until we get a valid rune or no byte left
for i := curPartLen - 1; i >= 0; i-- {
last, lastSize := utf8.DecodeRune(curPart[i:])
if last == utf8.RuneError && lastSize == 1 {
curPartLen--
} else {
curPartLen += lastSize - 1
break
}
}
if curPartLen == 0 {
// actually it's impossible that the part doesn't contain any valid rune,
// the only case is that the cap(readBuf) is too small, or the origin contain indeed doesn't contain any valid rune
// * try to leave the last 4 bytes (possible longest utf-8 encoding) to next round
// * at least consume 1 byte to avoid infinite loop
curPartLen = max(len(curPart)-utf8.UTFMax, 1)
}
// if curPartLen is not the same as curPart, it means we have cut some bytes,
// need to wait for more data if not eof
trailingCorrupted := curPartLen != len(curPart)
// finally, we get the real part we need
curPart = curPart[:curPartLen]
parts = append(parts, curPart)
partInTag = append(partInTag, e.curInTag)
pos += curPartLen
e.curInTag = nextInTag
if trailingCorrupted && e.readErr == nil {
// if the last part is corrupted, and we haven't reach eof, then we need to wait for more data to get the complete part
break
} }
pos = next
} }
if sb.Len() > 0 {
if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil { copy(e.readBuf, e.readBuf[pos:])
e.readBuf = e.readBuf[:len(e.readBuf)-pos]
return parts, partInTag, nil
}
func (e *escapeStreamer) writeDetectResults(data []byte, results []detectResult) error {
lastWriteRawIdx := -1
for idx := range results {
res := &results[idx]
if !res.needEscape {
if lastWriteRawIdx == -1 {
lastWriteRawIdx = idx
}
continue
}
if lastWriteRawIdx != -1 {
if _, err := e.out.Write(data[results[lastWriteRawIdx].position:res.position]); err != nil {
return err
}
lastWriteRawIdx = -1
}
switch res.runeType {
case runeTypeBroken:
if err := e.writeBrokenRune(data[res.position : res.position+res.runeSize]); err != nil {
return err
}
case runeTypeAmbiguous:
if err := e.writeAmbiguousRune(res.runeChar, res.confusable); err != nil {
return err
}
case runeTypeInvisible:
if err := e.writeInvisibleRune(res.runeChar); err != nil {
return err
}
case runeTypeControlChar:
if err := e.writeControlRune(res.runeChar); err != nil {
return err
}
default:
panic("unreachable")
}
}
if lastWriteRawIdx != -1 {
lastResult := results[len(results)-1]
if _, err := e.out.Write(data[results[lastWriteRawIdx].position : lastResult.position+lastResult.runeSize]); err != nil {
return err return err
} }
} }
return nil return nil
} }
func (e *escapeStreamer) handleRunes(data []byte, runes []rune, positions []int, types []runeType, confusables []rune, sb *strings.Builder) error { func (e *escapeStreamer) writeBrokenRune(_ []byte) (err error) {
for i, r := range runes { // Although we'd like to use the original bytes to display (show the real broken content to users),
switch types[i] { // however, when this "escape stream" module is applied to the content, the content has already been processed by other modules.
case brokenRuneType: // So the invalid bytes just can't be kept till this step, in most (all) cases, the only thing we see here is utf8.RuneError
if sb.Len() > 0 { _, err = io.WriteString(e.out, `<span class="broken-code-point"></span>`)
if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil { return err
return err
}
sb.Reset()
}
end := positions[i+1]
start := positions[i]
if err := e.brokenRune(data[start:end]); err != nil {
return err
}
case ambiguousRuneType:
if sb.Len() > 0 {
if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil {
return err
}
sb.Reset()
}
if err := e.ambiguousRune(r, confusables[0]); err != nil {
return err
}
confusables = confusables[1:]
case invisibleRuneType:
if sb.Len() > 0 {
if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil {
return err
}
sb.Reset()
}
if err := e.invisibleRune(r); err != nil {
return err
}
default:
_, _ = sb.WriteRune(r)
}
}
return nil
} }
func (e *escapeStreamer) brokenRune(bs []byte) error { func (e *escapeStreamer) writeEscapedCharHTML(tag1, attr, tag2, content, tag3 string) (err error) {
e.escaped.Escaped = true _, err = io.WriteString(e.out, tag1)
e.escaped.HasBadRunes = true if err != nil {
if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{
Key: "class",
Val: "broken-code-point",
}); err != nil {
return err return err
} }
if err := e.PassthroughHTMLStreamer.Text(fmt.Sprintf("<%X>", bs)); err != nil { _, err = io.WriteString(e.out, html.EscapeString(attr))
if err != nil {
return err return err
} }
_, err = io.WriteString(e.out, tag2)
return e.PassthroughHTMLStreamer.EndTag("span") if err != nil {
return err
}
_, err = io.WriteString(e.out, html.EscapeString(content))
if err != nil {
return err
}
_, err = io.WriteString(e.out, tag3)
return err
} }
func (e *escapeStreamer) ambiguousRune(r, c rune) error { func runeToHex(r rune) string {
return fmt.Sprintf("[U+%04X]", r)
}
func (e *escapeStreamer) writeAmbiguousRune(r, c rune) (err error) {
e.escaped.Escaped = true e.escaped.Escaped = true
e.escaped.HasAmbiguous = true e.escaped.HasAmbiguous = true
return e.writeEscapedCharHTML(
if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{ `<span class="ambiguous-code-point" data-tooltip-content="`,
Key: "class", e.locale.TrString("repo.ambiguous_character", string(r)+" "+runeToHex(r), string(c)+" "+runeToHex(c)),
Val: "ambiguous-code-point", `"><span class="char">`,
}, html.Attribute{ string(r),
Key: "data-tooltip-content", `</span></span>`,
Val: e.locale.TrString("repo.ambiguous_character", r, c), )
}); err != nil {
return err
}
if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{
Key: "class",
Val: "char",
}); err != nil {
return err
}
if err := e.PassthroughHTMLStreamer.Text(string(r)); err != nil {
return err
}
if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil {
return err
}
return e.PassthroughHTMLStreamer.EndTag("span")
} }
func (e *escapeStreamer) invisibleRune(r rune) error { func (e *escapeStreamer) writeInvisibleRune(r rune) error {
e.escaped.Escaped = true e.escaped.Escaped = true
e.escaped.HasInvisible = true e.escaped.HasInvisible = true
return e.writeEscapedCharHTML(
if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{ `<span class="escaped-code-point" data-escaped="`,
Key: "class", runeToHex(r),
Val: "escaped-code-point", `"><span class="char">`,
}, html.Attribute{ string(r),
Key: "data-escaped", `</span></span>`,
Val: fmt.Sprintf("[U+%04X]", r), )
}); err != nil {
return err
}
if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{
Key: "class",
Val: "char",
}); err != nil {
return err
}
if err := e.PassthroughHTMLStreamer.Text(string(r)); err != nil {
return err
}
if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil {
return err
}
return e.PassthroughHTMLStreamer.EndTag("span")
} }
type runeCountType struct { func (e *escapeStreamer) writeControlRune(r rune) error {
numBasicRunes int var display string
numNonConfusingNonBasicRunes int if r >= 0 && r <= 0x1f {
numAmbiguousRunes int display = string(0x2400 + r)
numInvisibleRunes int } else if r == 0x7f {
numBrokenRunes int display = string(rune(0x2421))
} else {
display = runeToHex(r)
}
return e.writeEscapedCharHTML(
`<span class="broken-code-point" data-escaped="`,
display,
`"><span class="char">`,
string(r),
`</span></span>`,
)
} }
func (counts runeCountType) needsEscape() bool { type detectResult struct {
if counts.numBrokenRunes > 0 { runeChar rune
return true runeType int
} runeSize int
if counts.numBasicRunes == 0 && position int
counts.numNonConfusingNonBasicRunes > 0 { confusable rune
return false needEscape bool
}
return counts.numAmbiguousRunes > 0 || counts.numInvisibleRunes > 0
} }
type runeType int
const ( const (
basicASCIIRuneType runeType = iota // <- This is technically deadcode but its self-documenting so it should stay runeTypeBasic int = iota
brokenRuneType runeTypeBroken
nonBasicASCIIRuneType runeTypeNonASCII
ambiguousRuneType runeTypeAmbiguous
invisibleRuneType runeTypeInvisible
runeTypeControlChar
) )
func (e *escapeStreamer) runeTypes(runes ...rune) (types []runeType, confusables []rune, runeCounts runeCountType) { func (e *escapeStreamer) detectRunes(data []byte) []detectResult {
types = make([]runeType, len(runes)) runeCount := utf8.RuneCount(data)
for i, r := range runes { results := make([]detectResult, runeCount)
var confusable rune invisibleRangeTable := globalVars().invisibleRangeTable
var i int
var confusable rune
for pos := 0; pos < len(data); i++ {
r, runeSize := utf8.DecodeRune(data[pos:])
results[i].runeChar = r
results[i].runeSize = runeSize
results[i].position = pos
pos += runeSize
switch { switch {
case r == utf8.RuneError: case r == utf8.RuneError:
types[i] = brokenRuneType results[i].runeType = runeTypeBroken
runeCounts.numBrokenRunes++ results[i].needEscape = true
case r == ' ' || r == '\t' || r == '\n': case r == ' ' || r == '\t' || r == '\n' || e.allowed[r]:
runeCounts.numBasicRunes++ results[i].runeType = runeTypeBasic
case e.allowed[r]: if r >= 0x80 {
if r > 0x7e || r < 0x20 { results[i].runeType = runeTypeNonASCII
types[i] = nonBasicASCIIRuneType
runeCounts.numNonConfusingNonBasicRunes++
} else {
runeCounts.numBasicRunes++
} }
case unicode.Is(InvisibleRanges, r): case r < 0x20 || r == 0x7f:
types[i] = invisibleRuneType results[i].runeType = runeTypeControlChar
runeCounts.numInvisibleRunes++ results[i].needEscape = true
case unicode.IsControl(r): case unicode.Is(invisibleRangeTable, r):
types[i] = invisibleRuneType results[i].runeType = runeTypeInvisible
runeCounts.numInvisibleRunes++ // not sure about results[i].needEscape, will be detected separately
case isAmbiguous(r, &confusable, e.ambiguousTables...): case isAmbiguous(r, &confusable, e.ambiguousTables...):
confusables = append(confusables, confusable) results[i].runeType = runeTypeAmbiguous
types[i] = ambiguousRuneType results[i].confusable = confusable
runeCounts.numAmbiguousRunes++ // not sure about results[i].needEscape, will be detected separately
case r > 0x7e || r < 0x20: case r >= 0x80:
types[i] = nonBasicASCIIRuneType results[i].runeType = runeTypeNonASCII
runeCounts.numNonConfusingNonBasicRunes++ default: // details to basic runes
default:
runeCounts.numBasicRunes++
} }
} }
return types, confusables, runeCounts return results
} }
+65 -34
View File
@@ -4,7 +4,6 @@
package charset package charset
import ( import (
"regexp"
"strings" "strings"
"testing" "testing"
@@ -13,6 +12,7 @@ import (
"code.gitea.io/gitea/modules/translation" "code.gitea.io/gitea/modules/translation"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
) )
type escapeControlTest struct { type escapeControlTest struct {
@@ -57,24 +57,24 @@ var escapeControlTests = []escapeControlTest{
status: EscapeStatus{}, status: EscapeStatus{},
}, },
{ {
name: "hebrew", name: "hebrew", // old test was wrong, such text shouldn't be escaped
text: "עד תקופת יוון העתיקה היה העיסוק במתמטיקה תכליתי בלבד: היא שימשה כאוסף של נוסחאות לחישוב קרקע, אוכלוסין וכו'. פריצת הדרך של היוונים, פרט לתרומותיהם הגדולות לידע המתמטי, הייתה בלימוד המתמטיקה כשלעצמה, מתוקף ערכה הרוחני. יחסם של חלק מהיוונים הקדמונים למתמטיקה היה דתי - למשל, הכת שאסף סביבו פיתגורס האמינה כי המתמטיקה היא הבסיס לכל הדברים. היוונים נחשבים ליוצרי מושג ההוכחה המתמטית, וכן לראשונים שעסקו במתמטיקה לשם עצמה, כלומר כתחום מחקרי עיוני ומופשט ולא רק כעזר שימושי. עם זאת, לצדה", text: "עד תקופת יוון העתיקה היה העיסוק במתמטיקה תכליתי בלבד: היא שימשה כאוסף של נוסחאות לחישוב קרקע, אוכלוסין וכו'. פריצת הדרך של היוונים, פרט לתרומותיהם הגדולות לידע המתמטי, הייתה בלימוד המתמטיקה כשלעצמה, מתוקף ערכה הרוחני. יחסם של חלק מהיוונים הקדמונים למתמטיקה היה דתי - למשל, הכת שאסף סביבו פיתגורס האמינה כי המתמטיקה היא הבסיס לכל הדברים. היוונים נחשבים ליוצרי מושג ההוכחה המתמטית, וכן לראשונים שעסקו במתמטיקה לשם עצמה, כלומר כתחום מחקרי עיוני ומופשט ולא רק כעזר שימושי. עם זאת, לצדה",
result: `עד תקופת <span class="ambiguous-code-point" data-tooltip-content="repo.ambiguous_character"><span class="char">י</span></span><span class="ambiguous-code-point" data-tooltip-content="repo.ambiguous_character"><span class="char">ו</span></span><span class="ambiguous-code-point" data-tooltip-content="repo.ambiguous_character"><span class="char">ו</span></span><span class="ambiguous-code-point" data-tooltip-content="repo.ambiguous_character"><span class="char">ן</span></span> העתיקה היה העיסוק במתמטיקה תכליתי בלבד: היא שימשה כאוסף של נוסחאות לחישוב קרקע, אוכלוסין וכו&#39;. פריצת הדרך של היוונים, פרט לתרומותיהם הגדולות לידע המתמטי, הייתה בלימוד המתמטיקה כשלעצמה, מתוקף ערכה הרוחני. יחסם של חלק מהיוונים הקדמונים למתמטיקה היה דתי - למשל, הכת שאסף סביבו פיתגורס האמינה כי המתמטיקה היא הבסיס לכל הדברים. היוונים נחשבים ליוצרי מושג ההוכחה המתמטית, וכן לראשונים שעסקו במתמטיקה לשם עצמה, כלומר כתחום מחקרי עיוני ומופשט ולא רק כעזר שימושי. עם זאת, לצדה`, result: "עד תקופת יוון העתיקה היה העיסוק במתמטיקה תכליתי בלבד: היא שימשה כאוסף של נוסחאות לחישוב קרקע, אוכלוסין וכו'. פריצת הדרך של היוונים, פרט לתרומותיהם הגדולות לידע המתמטי, הייתה בלימוד המתמטיקה כשלעצמה, מתוקף ערכה הרוחני. יחסם של חלק מהיוונים הקדמונים למתמטיקה היה דתי - למשל, הכת שאסף סביבו פיתגורס האמינה כי המתמטיקה היא הבסיס לכל הדברים. היוונים נחשבים ליוצרי מושג ההוכחה המתמטית, וכן לראשונים שעסקו במתמטיקה לשם עצמה, כלומר כתחום מחקרי עיוני ומופשט ולא רק כעזר שימושי. עם זאת, לצדה",
status: EscapeStatus{Escaped: true, HasAmbiguous: true}, status: EscapeStatus{},
}, },
{ {
name: "more hebrew", name: "more hebrew", // old test was wrong, such text shouldn't be escaped
text: `בתקופה מאוחרת יותר, השתמשו היוונים בשיטת סימון מתקדמת יותר, שבה הוצגו המספרים לפי 22 אותיות האלפבית היווני. לסימון המספרים בין 1 ל-9 נקבעו תשע האותיות הראשונות, בתוספת גרש ( ' ) בצד ימין של האות, למעלה; תשע האותיות הבאות ייצגו את העשרות מ-10 עד 90, והבאות את המאות. לסימון הספרות בין 1000 ל-900,000, השתמשו היוונים באותן אותיות, אך הוסיפו לאותיות את הגרש דווקא מצד שמאל של האותיות, למטה. ממיליון ומעלה, כנראה השתמשו היוונים בשני תגים במקום אחד. text: `בתקופה מאוחרת יותר, השתמשו היוונים בשיטת סימון מתקדמת יותר, שבה הוצגו המספרים לפי 22 אותיות האלפבית היווני. לסימון המספרים בין 1 ל-9 נקבעו תשע האותיות הראשונות, בתוספת גרש ( ' ) בצד ימין של האות, למעלה; תשע האותיות הבאות ייצגו את העשרות מ-10 עד 90, והבאות את המאות. לסימון הספרות בין 1000 ל-900,000, השתמשו היוונים באותן אותיות, אך הוסיפו לאותיות את הגרש דווקא מצד שמאל של האותיות, למטה. ממיליון ומעלה, כנראה השתמשו היוונים בשני תגים במקום אחד.
המתמטיקאי הבולט הראשון ביוון העתיקה, ויש האומרים בתולדות האנושות, הוא תאלס (624 לפנה"ס - 546 לפנה"ס בקירוב).[1] לא יהיה זה משולל יסוד להניח שהוא האדם הראשון שהוכיח משפט מתמטי, ולא רק גילה אותו. תאלס הוכיח שישרים מקבילים חותכים מצד אחד של שוקי זווית קטעים בעלי יחסים שווים (משפט תאלס הראשון), שהזווית המונחת על קוטר במעגל היא זווית ישרה (משפט תאלס השני), שהקוטר מחלק את המעגל לשני חלקים שווים, ושזוויות הבסיס במשולש שווה-שוקיים שוות זו לזו. מיוחסות לו גם שיטות למדידת גובהן של הפירמידות בעזרת מדידת צילן ולקביעת מיקומה של ספינה הנראית מן החוף. המתמטיקאי הבולט הראשון ביוון העתיקה, ויש האומרים בתולדות האנושות, הוא תאלס (624 לפנה"ס - 546 לפנה"ס בקירוב).[1] לא יהיה זה משולל יסוד להניח שהוא האדם הראשון שהוכיח משפט מתמטי, ולא רק גילה אותו. תאלס הוכיח שישרים מקבילים חותכים מצד אחד של שוקי זווית קטעים בעלי יחסים שווים (משפט תאלס הראשון), שהזווית המונחת על קוטר במעגל היא זווית ישרה (משפט תאלס השני), שהקוטר מחלק את המעגל לשני חלקים שווים, ושזוויות הבסיס במשולש שווה-שוקיים שוות זו לזו. מיוחסות לו גם שיטות למדידת גובהן של הפירמידות בעזרת מדידת צילן ולקביעת מיקומה של ספינה הנראית מן החוף.
בשנים 582 לפנה"ס עד 496 לפנה"ס, בקירוב, חי מתמטיקאי חשוב במיוחד - פיתגורס. המקורות הראשוניים עליו מועטים, וההיסטוריונים מתקשים להפריד את העובדות משכבת המסתורין והאגדות שנקשרו בו. ידוע שסביבו התקבצה האסכולה הפיתגוראית מעין כת פסבדו-מתמטית שהאמינה ש"הכל מספר", או ליתר דיוק הכל ניתן לכימות, וייחסה למספרים משמעויות מיסטיות. ככל הנראה הפיתגוראים ידעו לבנות את הגופים האפלטוניים, הכירו את הממוצע האריתמטי, הממוצע הגאומטרי והממוצע ההרמוני והגיעו להישגים חשובים נוספים. ניתן לומר שהפיתגוראים גילו את היותו של השורש הריבועי של 2, שהוא גם האלכסון בריבוע שאורך צלעותיו 1, אי רציונלי, אך תגליתם הייתה למעשה רק שהקטעים "חסרי מידה משותפת", ומושג המספר האי רציונלי מאוחר יותר.[2] אזכור ראשון לקיומם של קטעים חסרי מידה משותפת מופיע בדיאלוג "תאיטיטוס" של אפלטון, אך רעיון זה היה מוכר עוד קודם לכן, במאה החמישית לפנה"ס להיפאסוס, בן האסכולה הפיתגוראית, ואולי לפיתגורס עצמו.[3]`, בשנים 582 לפנה"ס עד 496 לפנה"ס, בקירוב, חי מתמטיקאי חשוב במיוחד - פיתגורס. המקורות הראשוניים עליו מועטים, וההיסטוריונים מתקשים להפריד את העובדות משכבת המסתורין והאגדות שנקשרו בו. ידוע שסביבו התקבצה האסכולה הפיתגוראית מעין כת פסבדו-מתמטית שהאמינה ש"הכל מספר", או ליתר דיוק הכל ניתן לכימות, וייחסה למספרים משמעויות מיסטיות. ככל הנראה הפיתגוראים ידעו לבנות את הגופים האפלטוניים, הכירו את הממוצע האריתמטי, הממוצע הגאומטרי והממוצע ההרמוני והגיעו להישגים חשובים נוספים. ניתן לומר שהפיתגוראים גילו את היותו של השורש הריבועי של 2, שהוא גם האלכסון בריבוע שאורך צלעותיו 1, אי רציונלי, אך תגליתם הייתה למעשה רק שהקטעים "חסרי מידה משותפת", ומושג המספר האי רציונלי מאוחר יותר.[2] אזכור ראשון לקיומם של קטעים חסרי מידה משותפת מופיע בדיאלוג "תאיטיטוס" של אפלטון, אך רעיון זה היה מוכר עוד קודם לכן, במאה החמישית לפנה"ס להיפאסוס, בן האסכולה הפיתגוראית, ואולי לפיתגורס עצמו.[3]`,
result: `בתקופה מאוחרת יותר, השתמשו היוונים בשיטת סימון מתקדמת יותר, שבה הוצגו המספרים לפי 22 אותיות האלפבית היווני. לסימון המספרים בין 1 ל-9 נקבעו תשע האותיות הראשונות, בתוספת גרש ( &#39; ) בצד ימין של האות, למעלה; תשע האותיות הבאות ייצגו את העשרות מ-10 עד 90, והבאות את המאות. לסימון הספרות בין 1000 ל-900,000, השתמשו היוונים באותן אותיות, אך הוסיפו לאותיות את הגרש דווקא מצד שמאל של האותיות, למטה. ממיליון ומעלה, כנראה השתמשו היוונים בשני תגים במקום אחד. result: `בתקופה מאוחרת יותר, השתמשו היוונים בשיטת סימון מתקדמת יותר, שבה הוצגו המספרים לפי 22 אותיות האלפבית היווני. לסימון המספרים בין 1 ל-9 נקבעו תשע האותיות הראשונות, בתוספת גרש ( ' ) בצד ימין של האות, למעלה; תשע האותיות הבאות ייצגו את העשרות מ-10 עד 90, והבאות את המאות. לסימון הספרות בין 1000 ל-900,000, השתמשו היוונים באותן אותיות, אך הוסיפו לאותיות את הגרש דווקא מצד שמאל של האותיות, למטה. ממיליון ומעלה, כנראה השתמשו היוונים בשני תגים במקום אחד.
המתמטיקאי הבולט הראשון ביוון העתיקה, ויש האומרים בתולדות האנושות, הוא תאלס (624 לפנה&#34;<span class="ambiguous-code-point" data-tooltip-content="repo.ambiguous_character"><span class="char">ס</span></span> - 546 לפנה&#34;<span class="ambiguous-code-point" data-tooltip-content="repo.ambiguous_character"><span class="char">ס</span></span> בקירוב).[1] לא יהיה זה משולל יסוד להניח שהוא האדם הראשון שהוכיח משפט מתמטי, ולא רק גילה אותו. תאלס הוכיח שישרים מקבילים חותכים מצד אחד של שוקי זווית קטעים בעלי יחסים שווים (משפט תאלס הראשון), שהזווית המונחת על קוטר במעגל היא זווית ישרה (משפט תאלס השני), שהקוטר מחלק את המעגל לשני חלקים שווים, ושזוויות הבסיס במשולש שווה-שוקיים שוות זו לזו. מיוחסות לו גם שיטות למדידת גובהן של הפירמידות בעזרת מדידת צילן ולקביעת מיקומה של ספינה הנראית מן החוף. המתמטיקאי הבולט הראשון ביוון העתיקה, ויש האומרים בתולדות האנושות, הוא תאלס (624 לפנה"ס - 546 לפנה"ס בקירוב).[1] לא יהיה זה משולל יסוד להניח שהוא האדם הראשון שהוכיח משפט מתמטי, ולא רק גילה אותו. תאלס הוכיח שישרים מקבילים חותכים מצד אחד של שוקי זווית קטעים בעלי יחסים שווים (משפט תאלס הראשון), שהזווית המונחת על קוטר במעגל היא זווית ישרה (משפט תאלס השני), שהקוטר מחלק את המעגל לשני חלקים שווים, ושזוויות הבסיס במשולש שווה-שוקיים שוות זו לזו. מיוחסות לו גם שיטות למדידת גובהן של הפירמידות בעזרת מדידת צילן ולקביעת מיקומה של ספינה הנראית מן החוף.
בשנים 582 לפנה&#34;<span class="ambiguous-code-point" data-tooltip-content="repo.ambiguous_character"><span class="char">ס</span></span> עד 496 לפנה&#34;<span class="ambiguous-code-point" data-tooltip-content="repo.ambiguous_character"><span class="char">ס</span></span>, בקירוב, חי מתמטיקאי חשוב במיוחד - פיתגורס. המקורות הראשוניים עליו מועטים, וההיסטוריונים מתקשים להפריד את העובדות משכבת המסתורין והאגדות שנקשרו בו. ידוע שסביבו התקבצה האסכולה הפיתגוראית מעין כת פסבדו-מתמטית שהאמינה ש&#34;הכל מספר&#34;, או ליתר דיוק הכל ניתן לכימות, וייחסה למספרים משמעויות מיסטיות. ככל הנראה הפיתגוראים ידעו לבנות את הגופים האפלטוניים, הכירו את הממוצע האריתמטי, הממוצע הגאומטרי והממוצע ההרמוני והגיעו להישגים חשובים נוספים. ניתן לומר שהפיתגוראים גילו את היותו של השורש הריבועי של 2, שהוא גם האלכסון בריבוע שאורך צלעותיו 1, אי רציונלי, אך תגליתם הייתה למעשה רק שהקטעים &#34;חסרי מידה משותפת&#34;, ומושג המספר האי רציונלי מאוחר יותר.[2] אזכור ראשון לקיומם של קטעים חסרי מידה משותפת מופיע בדיאלוג &#34;תאיטיטוס&#34; של אפלטון, אך רעיון זה היה מוכר עוד קודם לכן, במאה החמישית לפנה&#34;<span class="ambiguous-code-point" data-tooltip-content="repo.ambiguous_character"><span class="char">ס</span></span> להיפאסוס, בן האסכולה הפיתגוראית, ואולי לפיתגורס עצמו.[3]`, בשנים 582 לפנה"ס עד 496 לפנה"ס, בקירוב, חי מתמטיקאי חשוב במיוחד - פיתגורס. המקורות הראשוניים עליו מועטים, וההיסטוריונים מתקשים להפריד את העובדות משכבת המסתורין והאגדות שנקשרו בו. ידוע שסביבו התקבצה האסכולה הפיתגוראית מעין כת פסבדו-מתמטית שהאמינה ש"הכל מספר", או ליתר דיוק הכל ניתן לכימות, וייחסה למספרים משמעויות מיסטיות. ככל הנראה הפיתגוראים ידעו לבנות את הגופים האפלטוניים, הכירו את הממוצע האריתמטי, הממוצע הגאומטרי והממוצע ההרמוני והגיעו להישגים חשובים נוספים. ניתן לומר שהפיתגוראים גילו את היותו של השורש הריבועי של 2, שהוא גם האלכסון בריבוע שאורך צלעותיו 1, אי רציונלי, אך תגליתם הייתה למעשה רק שהקטעים "חסרי מידה משותפת", ומושג המספר האי רציונלי מאוחר יותר.[2] אזכור ראשון לקיומם של קטעים חסרי מידה משותפת מופיע בדיאלוג "תאיטיטוס" של אפלטון, אך רעיון זה היה מוכר עוד קודם לכן, במאה החמישית לפנה"ס להיפאסוס, בן האסכולה הפיתגוראית, ואולי לפיתגורס עצמו.[3]`,
status: EscapeStatus{Escaped: true, HasAmbiguous: true}, status: EscapeStatus{},
}, },
{ {
name: "Mixed RTL+LTR", name: "Mixed RTL+LTR",
@@ -111,7 +111,7 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`,
{ {
name: "CVE testcase", name: "CVE testcase",
text: "if access_level != \"user\u202E \u2066// Check if admin\u2069 \u2066\" {", text: "if access_level != \"user\u202E \u2066// Check if admin\u2069 \u2066\" {",
result: `if access_level != &#34;user<span class="escaped-code-point" data-escaped="[U+202E]"><span class="char">` + "\u202e" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>// Check if admin<span class="escaped-code-point" data-escaped="[U+2069]"><span class="char">` + "\u2069" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>&#34; {`, result: `if access_level != "user<span class="escaped-code-point" data-escaped="[U+202E]"><span class="char">` + "\u202e" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>// Check if admin<span class="escaped-code-point" data-escaped="[U+2069]"><span class="char">` + "\u2069" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>" {`,
status: EscapeStatus{Escaped: true, HasInvisible: true}, status: EscapeStatus{Escaped: true, HasInvisible: true},
}, },
{ {
@@ -123,7 +123,7 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`,
result: `Many computer programs fail to display bidirectional text correctly. result: `Many computer programs fail to display bidirectional text correctly.
For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066\n" + For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066\n" +
`sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).` + `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).` +
"\n" + `if access_level != &#34;user<span class="escaped-code-point" data-escaped="[U+202E]"><span class="char">` + "\u202e" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>// Check if admin<span class="escaped-code-point" data-escaped="[U+2069]"><span class="char">` + "\u2069" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>&#34; {` + "\n", "\n" + `if access_level != "user<span class="escaped-code-point" data-escaped="[U+202E]"><span class="char">` + "\u202e" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>// Check if admin<span class="escaped-code-point" data-escaped="[U+2069]"><span class="char">` + "\u2069" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>" {` + "\n",
status: EscapeStatus{Escaped: true, HasInvisible: true}, status: EscapeStatus{Escaped: true, HasInvisible: true},
}, },
{ {
@@ -134,38 +134,22 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`,
result: "\xef\xbb\xbftest", result: "\xef\xbb\xbftest",
status: EscapeStatus{}, status: EscapeStatus{},
}, },
{
name: "ambiguous",
text: "O𝐾",
result: `O<span class="ambiguous-code-point" data-tooltip-content="repo.ambiguous_character:𝐾 [U+1D43E],K [U+004B]"><span class="char">𝐾</span></span>`,
status: EscapeStatus{Escaped: true, HasAmbiguous: true},
},
} }
func TestEscapeControlReader(t *testing.T) { func TestEscapeControlReader(t *testing.T) {
// add some control characters to the tests for _, tt := range escapeControlTests {
tests := make([]escapeControlTest, 0, len(escapeControlTests)*3)
copy(tests, escapeControlTests)
// if there is a BOM, we should keep the BOM
addPrefix := func(prefix, s string) string {
if strings.HasPrefix(s, "\xef\xbb\xbf") {
return s[:3] + prefix + s[3:]
}
return prefix + s
}
for _, test := range escapeControlTests {
test.name += " (+Control)"
test.text = addPrefix("\u001E", test.text)
test.result = addPrefix(`<span class="escaped-code-point" data-escaped="[U+001E]"><span class="char">`+"\u001e"+`</span></span>`, test.result)
test.status.Escaped = true
test.status.HasInvisible = true
tests = append(tests, test)
}
re := regexp.MustCompile(`repo.ambiguous_character:\d+,\d+`) // simplify the output for the tests, remove the translation variants
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
output := &strings.Builder{} output := &strings.Builder{}
status, err := EscapeControlReader(strings.NewReader(tt.text), output, &translation.MockLocale{}) status, err := EscapeControlReader(strings.NewReader(tt.text), output, &translation.MockLocale{})
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, tt.status, *status) assert.Equal(t, tt.status, *status)
outStr := output.String() outStr := output.String()
outStr = re.ReplaceAllString(outStr, "repo.ambiguous_character")
assert.Equal(t, tt.result, outStr) assert.Equal(t, tt.result, outStr)
}) })
} }
@@ -179,3 +163,50 @@ func TestSettingAmbiguousUnicodeDetection(t *testing.T) {
_, out = EscapeControlHTML("a test", &translation.MockLocale{}) _, out = EscapeControlHTML("a test", &translation.MockLocale{})
assert.EqualValues(t, `a test`, out) assert.EqualValues(t, `a test`, out)
} }
func TestHTMLChunkReader(t *testing.T) {
type textPart struct {
text string
isTag bool
}
testReadChunks := func(t *testing.T, chunkSize int, input string, expected []textPart) {
r := &htmlChunkReader{in: strings.NewReader(input), readBuf: make([]byte, 0, chunkSize)}
var results []textPart
for {
parts, partIsTag, err := r.readRunes()
if err != nil {
break
}
for i, part := range parts {
results = append(results, textPart{string(part), partIsTag[i]})
}
}
assert.Equal(t, expected, results, "chunk size: %d, input: %s", chunkSize, input)
}
testReadChunks(t, 10, "abc<def>ghi", []textPart{
{text: "abc", isTag: false},
{text: "<def>", isTag: true},
{text: "gh", isTag: false},
// -- chunk
{text: "i", isTag: false},
})
testReadChunks(t, 10, "<abc><def>ghi", []textPart{
{text: "<abc>", isTag: true},
{text: "<def>", isTag: true},
// -- chunk
{text: "ghi", isTag: false},
})
rune1, rune2, rune3, rune4 := "A", "é", "啊", "🌞"
require.Len(t, rune1, 1)
require.Len(t, rune2, 2)
require.Len(t, rune3, 3)
require.Len(t, rune4, 4)
input := "<" + rune1 + rune2 + rune3 + rune4 + ">" + rune1 + rune2 + rune3 + rune4
testReadChunks(t, 4, input, []textPart{{"<Aé", true}, {"啊", true}, {"🌞", true}, {">", true}, {"Aé", false}, {"啊", false}, {"🌞", false}})
testReadChunks(t, 5, input, []textPart{{"<Aé", true}, {"啊", true}, {"🌞>", true}, {"Aé", false}, {"啊", false}, {"🌞", false}})
testReadChunks(t, 6, input, []textPart{{"<Aé", true}, {"啊", true}, {"🌞>", true}, {"A", false}, {"é啊", false}, {"🌞", false}})
testReadChunks(t, 7, input, []textPart{{"<Aé啊", true}, {"🌞>", true}, {"A", false}, {"é啊", false}, {"🌞", false}})
}
@@ -5,37 +5,86 @@ package main
import ( import (
"bytes" "bytes"
"flag"
"fmt" "fmt"
"go/format" "go/format"
"log"
"os" "os"
"sort"
"text/template" "text/template"
"unicode"
"code.gitea.io/gitea/modules/json"
"golang.org/x/text/unicode/rangetable" "golang.org/x/text/unicode/rangetable"
) )
// ambiguous.json provides a one to one mapping of ambiguous characters to other characters
// See https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json
type AmbiguousTable struct {
Confusable []rune
With []rune
Locale string
RangeTable *unicode.RangeTable
}
type RunePair struct {
Confusable rune
With rune
}
// InvisibleRunes these are runes that vscode has assigned to be invisible // InvisibleRunes these are runes that vscode has assigned to be invisible
// See https://github.com/hediet/vscode-unicode-data // See https://github.com/hediet/vscode-unicode-data
var InvisibleRunes = []rune{ var InvisibleRunes = []rune{
9, 10, 11, 12, 13, 32, 127, 160, 173, 847, 1564, 4447, 4448, 6068, 6069, 6155, 6156, 6157, 6158, 7355, 7356, 8192, 8193, 8194, 8195, 8196, 8197, 8198, 8199, 8200, 8201, 8202, 8203, 8204, 8205, 8206, 8207, 8234, 8235, 8236, 8237, 8238, 8239, 8287, 8288, 8289, 8290, 8291, 8292, 8293, 8294, 8295, 8296, 8297, 8298, 8299, 8300, 8301, 8302, 8303, 10240, 12288, 12644, 65024, 65025, 65026, 65027, 65028, 65029, 65030, 65031, 65032, 65033, 65034, 65035, 65036, 65037, 65038, 65039, 65279, 65440, 65520, 65521, 65522, 65523, 65524, 65525, 65526, 65527, 65528, 65532, 78844, 119155, 119156, 119157, 119158, 119159, 119160, 119161, 119162, 917504, 917505, 917506, 917507, 917508, 917509, 917510, 917511, 917512, 917513, 917514, 917515, 917516, 917517, 917518, 917519, 917520, 917521, 917522, 917523, 917524, 917525, 917526, 917527, 917528, 917529, 917530, 917531, 917532, 917533, 917534, 917535, 917536, 917537, 917538, 917539, 917540, 917541, 917542, 917543, 917544, 917545, 917546, 917547, 917548, 917549, 917550, 917551, 917552, 917553, 917554, 917555, 917556, 917557, 917558, 917559, 917560, 917561, 917562, 917563, 917564, 917565, 917566, 917567, 917568, 917569, 917570, 917571, 917572, 917573, 917574, 917575, 917576, 917577, 917578, 917579, 917580, 917581, 917582, 917583, 917584, 917585, 917586, 917587, 917588, 917589, 917590, 917591, 917592, 917593, 917594, 917595, 917596, 917597, 917598, 917599, 917600, 917601, 917602, 917603, 917604, 917605, 917606, 917607, 917608, 917609, 917610, 917611, 917612, 917613, 917614, 917615, 917616, 917617, 917618, 917619, 917620, 917621, 917622, 917623, 917624, 917625, 917626, 917627, 917628, 917629, 917630, 917631, 917760, 917761, 917762, 917763, 917764, 917765, 917766, 917767, 917768, 917769, 917770, 917771, 917772, 917773, 917774, 917775, 917776, 917777, 917778, 917779, 917780, 917781, 917782, 917783, 917784, 917785, 917786, 917787, 917788, 917789, 917790, 917791, 917792, 917793, 917794, 917795, 917796, 917797, 917798, 917799, 917800, 917801, 917802, 917803, 917804, 917805, 917806, 917807, 917808, 917809, 917810, 917811, 917812, 917813, 917814, 917815, 917816, 917817, 917818, 917819, 917820, 917821, 917822, 917823, 917824, 917825, 917826, 917827, 917828, 917829, 917830, 917831, 917832, 917833, 917834, 917835, 917836, 917837, 917838, 917839, 917840, 917841, 917842, 917843, 917844, 917845, 917846, 917847, 917848, 917849, 917850, 917851, 917852, 917853, 917854, 917855, 917856, 917857, 917858, 917859, 917860, 917861, 917862, 917863, 917864, 917865, 917866, 917867, 917868, 917869, 917870, 917871, 917872, 917873, 917874, 917875, 917876, 917877, 917878, 917879, 917880, 917881, 917882, 917883, 917884, 917885, 917886, 917887, 917888, 917889, 917890, 917891, 917892, 917893, 917894, 917895, 917896, 917897, 917898, 917899, 917900, 917901, 917902, 917903, 917904, 917905, 917906, 917907, 917908, 917909, 917910, 917911, 917912, 917913, 917914, 917915, 917916, 917917, 917918, 917919, 917920, 917921, 917922, 917923, 917924, 917925, 917926, 917927, 917928, 917929, 917930, 917931, 917932, 917933, 917934, 917935, 917936, 917937, 917938, 917939, 917940, 917941, 917942, 917943, 917944, 917945, 917946, 917947, 917948, 917949, 917950, 917951, 917952, 917953, 917954, 917955, 917956, 917957, 917958, 917959, 917960, 917961, 917962, 917963, 917964, 917965, 917966, 917967, 917968, 917969, 917970, 917971, 917972, 917973, 917974, 917975, 917976, 917977, 917978, 917979, 917980, 917981, 917982, 917983, 917984, 917985, 917986, 917987, 917988, 917989, 917990, 917991, 917992, 917993, 917994, 917995, 917996, 917997, 917998, 917999, 9, 10, 11, 12, 13, 32, 127, 160, 173, 847, 1564, 4447, 4448, 6068, 6069, 6155, 6156, 6157, 6158, 7355, 7356, 8192, 8193, 8194, 8195, 8196, 8197, 8198, 8199, 8200, 8201, 8202, 8203, 8204, 8205, 8206, 8207, 8234, 8235, 8236, 8237, 8238, 8239, 8287, 8288, 8289, 8290, 8291, 8292, 8293, 8294, 8295, 8296, 8297, 8298, 8299, 8300, 8301, 8302, 8303, 10240, 12288, 12644, 65024, 65025, 65026, 65027, 65028, 65029, 65030, 65031, 65032, 65033, 65034, 65035, 65036, 65037, 65038, 65039, 65279, 65440, 65520, 65521, 65522, 65523, 65524, 65525, 65526, 65527, 65528, 65532, 78844, 119155, 119156, 119157, 119158, 119159, 119160, 119161, 119162, 917504, 917505, 917506, 917507, 917508, 917509, 917510, 917511, 917512, 917513, 917514, 917515, 917516, 917517, 917518, 917519, 917520, 917521, 917522, 917523, 917524, 917525, 917526, 917527, 917528, 917529, 917530, 917531, 917532, 917533, 917534, 917535, 917536, 917537, 917538, 917539, 917540, 917541, 917542, 917543, 917544, 917545, 917546, 917547, 917548, 917549, 917550, 917551, 917552, 917553, 917554, 917555, 917556, 917557, 917558, 917559, 917560, 917561, 917562, 917563, 917564, 917565, 917566, 917567, 917568, 917569, 917570, 917571, 917572, 917573, 917574, 917575, 917576, 917577, 917578, 917579, 917580, 917581, 917582, 917583, 917584, 917585, 917586, 917587, 917588, 917589, 917590, 917591, 917592, 917593, 917594, 917595, 917596, 917597, 917598, 917599, 917600, 917601, 917602, 917603, 917604, 917605, 917606, 917607, 917608, 917609, 917610, 917611, 917612, 917613, 917614, 917615, 917616, 917617, 917618, 917619, 917620, 917621, 917622, 917623, 917624, 917625, 917626, 917627, 917628, 917629, 917630, 917631, 917760, 917761, 917762, 917763, 917764, 917765, 917766, 917767, 917768, 917769, 917770, 917771, 917772, 917773, 917774, 917775, 917776, 917777, 917778, 917779, 917780, 917781, 917782, 917783, 917784, 917785, 917786, 917787, 917788, 917789, 917790, 917791, 917792, 917793, 917794, 917795, 917796, 917797, 917798, 917799, 917800, 917801, 917802, 917803, 917804, 917805, 917806, 917807, 917808, 917809, 917810, 917811, 917812, 917813, 917814, 917815, 917816, 917817, 917818, 917819, 917820, 917821, 917822, 917823, 917824, 917825, 917826, 917827, 917828, 917829, 917830, 917831, 917832, 917833, 917834, 917835, 917836, 917837, 917838, 917839, 917840, 917841, 917842, 917843, 917844, 917845, 917846, 917847, 917848, 917849, 917850, 917851, 917852, 917853, 917854, 917855, 917856, 917857, 917858, 917859, 917860, 917861, 917862, 917863, 917864, 917865, 917866, 917867, 917868, 917869, 917870, 917871, 917872, 917873, 917874, 917875, 917876, 917877, 917878, 917879, 917880, 917881, 917882, 917883, 917884, 917885, 917886, 917887, 917888, 917889, 917890, 917891, 917892, 917893, 917894, 917895, 917896, 917897, 917898, 917899, 917900, 917901, 917902, 917903, 917904, 917905, 917906, 917907, 917908, 917909, 917910, 917911, 917912, 917913, 917914, 917915, 917916, 917917, 917918, 917919, 917920, 917921, 917922, 917923, 917924, 917925, 917926, 917927, 917928, 917929, 917930, 917931, 917932, 917933, 917934, 917935, 917936, 917937, 917938, 917939, 917940, 917941, 917942, 917943, 917944, 917945, 917946, 917947, 917948, 917949, 917950, 917951, 917952, 917953, 917954, 917955, 917956, 917957, 917958, 917959, 917960, 917961, 917962, 917963, 917964, 917965, 917966, 917967, 917968, 917969, 917970, 917971, 917972, 917973, 917974, 917975, 917976, 917977, 917978, 917979, 917980, 917981, 917982, 917983, 917984, 917985, 917986, 917987, 917988, 917989, 917990, 917991, 917992, 917993, 917994, 917995, 917996, 917997, 917998, 917999,
} }
var verbose bool func generateAmbiguous() {
bs, err := os.ReadFile("ambiguous.json")
func main() { if err != nil {
flag.Usage = func() { log.Fatalf("Unable to read, err: %v", err)
fmt.Fprintf(os.Stderr, `%s: Generate InvisibleRunesRange
Usage: %[1]s [-v] [-o output.go]
`, os.Args[0])
flag.PrintDefaults()
} }
output := "" var unwrapped string
flag.BoolVar(&verbose, "v", false, "verbose output") if err := json.Unmarshal(bs, &unwrapped); err != nil {
flag.StringVar(&output, "o", "invisible_gen.go", "file to output to") log.Fatalf("Unable to unwrap content in, err: %v", err)
flag.Parse() }
fromJSON := map[string][]uint32{}
if err := json.Unmarshal([]byte(unwrapped), &fromJSON); err != nil {
log.Fatalf("Unable to unmarshal content in, err: %v", err)
}
tables := make([]*AmbiguousTable, 0, len(fromJSON))
for locale, chars := range fromJSON {
table := &AmbiguousTable{Locale: locale}
table.Confusable = make([]rune, 0, len(chars)/2)
table.With = make([]rune, 0, len(chars)/2)
pairs := make([]RunePair, len(chars)/2)
for i := 0; i < len(chars); i += 2 {
pairs[i/2].Confusable, pairs[i/2].With = rune(chars[i]), rune(chars[i+1])
}
sort.Slice(pairs, func(i, j int) bool {
return pairs[i].Confusable < pairs[j].Confusable
})
for _, pair := range pairs {
table.Confusable = append(table.Confusable, pair.Confusable)
table.With = append(table.With, pair.With)
}
table.RangeTable = rangetable.New(table.Confusable...)
tables = append(tables, table)
}
sort.Slice(tables, func(i, j int) bool {
return tables[i].Locale < tables[j].Locale
})
data := map[string]any{"Tables": tables}
if err := runTemplate(templateAmbiguous, "../ambiguous_gen.go", &data); err != nil {
log.Fatalf("Unable to run template: %v", err)
}
}
func generateInvisible() {
// First we filter the runes to remove // First we filter the runes to remove
// <space><tab><newline> // <space><tab><newline>
filtered := make([]rune, 0, len(InvisibleRunes)) filtered := make([]rune, 0, len(InvisibleRunes))
@@ -47,8 +96,8 @@ Usage: %[1]s [-v] [-o output.go]
} }
table := rangetable.New(filtered...) table := rangetable.New(filtered...)
if err := runTemplate(generatorTemplate, output, table); err != nil { if err := runTemplate(generatorInvisible, "../invisible_gen.go", table); err != nil {
fatalf("Unable to run template: %v", err) log.Fatalf("Unable to run template: %v", err)
} }
} }
@@ -59,7 +108,7 @@ func runTemplate(t *template.Template, filename string, data any) error {
} }
bs, err := format.Source(buf.Bytes()) bs, err := format.Source(buf.Bytes())
if err != nil { if err != nil {
verbosef("Bad source:\n%s", buf.String()) log.Printf("Bad source:\n%s", buf.String())
return fmt.Errorf("unable to format source: %w", err) return fmt.Errorf("unable to format source: %w", err)
} }
@@ -85,37 +134,68 @@ func runTemplate(t *template.Template, filename string, data any) error {
return nil return nil
} }
var generatorTemplate = template.Must(template.New("invisibleTemplate").Parse(`// This file is generated by modules/charset/invisible/generate.go DO NOT EDIT func main() {
// Copyright 2022 The Gitea Authors. All rights reserved. generateAmbiguous()
// SPDX-License-Identifier: MIT generateInvisible()
}
var templateAmbiguous = template.Must(template.New("ambiguousTemplate").Parse(`// This file is generated by modules/charset/generate/generate.go DO NOT EDIT
// Copyright 2026 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package charset package charset
import "unicode" import "unicode"
var InvisibleRanges = &unicode.RangeTable{ // This file is generated from https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json
R16: []unicode.Range16{
{{range .R16 }} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}}, // AmbiguousTable matches a confusable rune with its partner for the Locale
{{end}} }, type AmbiguousTable struct {
R32: []unicode.Range32{ Confusable []rune
{{range .R32}} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}}, With []rune
{{end}} }, Locale string
LatinOffset: {{.LatinOffset}}, RangeTable *unicode.RangeTable
}
func newAmbiguousTableMap() map[string]*AmbiguousTable {
return map[string]*AmbiguousTable {
{{- range .Tables}}
{{printf "%q" .Locale}}: {
Confusable: []rune{ {{range .Confusable}}{{.}},{{end}} },
With: []rune{ {{range .With}}{{.}},{{end}} },
Locale: {{printf "%q" .Locale}},
RangeTable: &unicode.RangeTable{
R16: []unicode.Range16{
{{range .RangeTable.R16 }} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}},
{{end}} },
R32: []unicode.Range32{
{{range .RangeTable.R32}} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}},
{{end}} },
LatinOffset: {{.RangeTable.LatinOffset}},
},
},
{{end}}
}
} }
`)) `))
func logf(format string, args ...any) { var generatorInvisible = template.Must(template.New("invisibleTemplate").Parse(`// This file is generated by modules/charset/generate/generate.go DO NOT EDIT
fmt.Fprintf(os.Stderr, format+"\n", args...) // Copyright 2026 The Gitea Authors. All rights reserved.
} // SPDX-License-Identifier: MIT
func verbosef(format string, args ...any) { package charset
if verbose {
logf(format, args...) import "unicode"
func newInvisibleRangeTable() *unicode.RangeTable {
return &unicode.RangeTable{
R16: []unicode.Range16{
{{range .R16 }} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}},
{{end}}},
R32: []unicode.Range32{
{{range .R32}} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}},
{{end}}},
LatinOffset: {{.LatinOffset}},
} }
} }
`))
func fatalf(format string, args ...any) {
logf("fatal: "+format+"\n", args...)
os.Exit(1)
}
-200
View File
@@ -1,200 +0,0 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package charset
import (
"fmt"
"io"
"golang.org/x/net/html"
)
// HTMLStreamer represents a SAX-like interface for HTML
type HTMLStreamer interface {
Error(err error) error
Doctype(data string) error
Comment(data string) error
StartTag(data string, attrs ...html.Attribute) error
SelfClosingTag(data string, attrs ...html.Attribute) error
EndTag(data string) error
Text(data string) error
}
// PassthroughHTMLStreamer is a passthrough streamer
type PassthroughHTMLStreamer struct {
next HTMLStreamer
}
func NewPassthroughStreamer(next HTMLStreamer) *PassthroughHTMLStreamer {
return &PassthroughHTMLStreamer{next: next}
}
var _ (HTMLStreamer) = &PassthroughHTMLStreamer{}
// Error tells the next streamer in line that there is an error
func (p *PassthroughHTMLStreamer) Error(err error) error {
return p.next.Error(err)
}
// Doctype tells the next streamer what the doctype is
func (p *PassthroughHTMLStreamer) Doctype(data string) error {
return p.next.Doctype(data)
}
// Comment tells the next streamer there is a comment
func (p *PassthroughHTMLStreamer) Comment(data string) error {
return p.next.Comment(data)
}
// StartTag tells the next streamer there is a starting tag
func (p *PassthroughHTMLStreamer) StartTag(data string, attrs ...html.Attribute) error {
return p.next.StartTag(data, attrs...)
}
// SelfClosingTag tells the next streamer there is a self-closing tag
func (p *PassthroughHTMLStreamer) SelfClosingTag(data string, attrs ...html.Attribute) error {
return p.next.SelfClosingTag(data, attrs...)
}
// EndTag tells the next streamer there is a end tag
func (p *PassthroughHTMLStreamer) EndTag(data string) error {
return p.next.EndTag(data)
}
// Text tells the next streamer there is a text
func (p *PassthroughHTMLStreamer) Text(data string) error {
return p.next.Text(data)
}
// HTMLStreamWriter acts as a writing sink
type HTMLStreamerWriter struct {
io.Writer
err error
}
// Write implements io.Writer
func (h *HTMLStreamerWriter) Write(data []byte) (int, error) {
if h.err != nil {
return 0, h.err
}
return h.Writer.Write(data)
}
// Write implements io.StringWriter
func (h *HTMLStreamerWriter) WriteString(data string) (int, error) {
if h.err != nil {
return 0, h.err
}
return h.Writer.Write([]byte(data))
}
// Error tells the next streamer in line that there is an error
func (h *HTMLStreamerWriter) Error(err error) error {
if h.err == nil {
h.err = err
}
return h.err
}
// Doctype tells the next streamer what the doctype is
func (h *HTMLStreamerWriter) Doctype(data string) error {
_, h.err = h.WriteString("<!DOCTYPE " + data + ">")
return h.err
}
// Comment tells the next streamer there is a comment
func (h *HTMLStreamerWriter) Comment(data string) error {
_, h.err = h.WriteString("<!--" + data + "-->")
return h.err
}
// StartTag tells the next streamer there is a starting tag
func (h *HTMLStreamerWriter) StartTag(data string, attrs ...html.Attribute) error {
return h.startTag(data, attrs, false)
}
// SelfClosingTag tells the next streamer there is a self-closing tag
func (h *HTMLStreamerWriter) SelfClosingTag(data string, attrs ...html.Attribute) error {
return h.startTag(data, attrs, true)
}
func (h *HTMLStreamerWriter) startTag(data string, attrs []html.Attribute, selfclosing bool) error {
if _, h.err = h.WriteString("<" + data); h.err != nil {
return h.err
}
for _, attr := range attrs {
if _, h.err = h.WriteString(" " + attr.Key + "=\"" + html.EscapeString(attr.Val) + "\""); h.err != nil {
return h.err
}
}
if selfclosing {
if _, h.err = h.WriteString("/>"); h.err != nil {
return h.err
}
} else {
if _, h.err = h.WriteString(">"); h.err != nil {
return h.err
}
}
return h.err
}
// EndTag tells the next streamer there is a end tag
func (h *HTMLStreamerWriter) EndTag(data string) error {
_, h.err = h.WriteString("</" + data + ">")
return h.err
}
// Text tells the next streamer there is a text
func (h *HTMLStreamerWriter) Text(data string) error {
_, h.err = h.WriteString(html.EscapeString(data))
return h.err
}
// StreamHTML streams an html to a provided streamer
func StreamHTML(source io.Reader, streamer HTMLStreamer) error {
tokenizer := html.NewTokenizer(source)
for {
tt := tokenizer.Next()
switch tt {
case html.ErrorToken:
if tokenizer.Err() != io.EOF {
return tokenizer.Err()
}
return nil
case html.DoctypeToken:
token := tokenizer.Token()
if err := streamer.Doctype(token.Data); err != nil {
return err
}
case html.CommentToken:
token := tokenizer.Token()
if err := streamer.Comment(token.Data); err != nil {
return err
}
case html.StartTagToken:
token := tokenizer.Token()
if err := streamer.StartTag(token.Data, token.Attr...); err != nil {
return err
}
case html.SelfClosingTagToken:
token := tokenizer.Token()
if err := streamer.StartTag(token.Data, token.Attr...); err != nil {
return err
}
case html.EndTagToken:
token := tokenizer.Token()
if err := streamer.EndTag(token.Data); err != nil {
return err
}
case html.TextToken:
token := tokenizer.Token()
if err := streamer.Text(token.Data); err != nil {
return err
}
default:
return fmt.Errorf("unknown type of token: %d", tt)
}
}
}
+31 -29
View File
@@ -1,36 +1,38 @@
// This file is generated by modules/charset/invisible/generate.go DO NOT EDIT // This file is generated by modules/charset/generate/generate.go DO NOT EDIT
// Copyright 2022 The Gitea Authors. All rights reserved. // Copyright 2026 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
package charset package charset
import "unicode" import "unicode"
var InvisibleRanges = &unicode.RangeTable{ func newInvisibleRangeTable() *unicode.RangeTable {
R16: []unicode.Range16{ return &unicode.RangeTable{
{Lo: 11, Hi: 13, Stride: 1}, R16: []unicode.Range16{
{Lo: 127, Hi: 160, Stride: 33}, {Lo: 11, Hi: 13, Stride: 1},
{Lo: 173, Hi: 847, Stride: 674}, {Lo: 127, Hi: 160, Stride: 33},
{Lo: 1564, Hi: 4447, Stride: 2883}, {Lo: 173, Hi: 847, Stride: 674},
{Lo: 4448, Hi: 6068, Stride: 1620}, {Lo: 1564, Hi: 4447, Stride: 2883},
{Lo: 6069, Hi: 6155, Stride: 86}, {Lo: 4448, Hi: 6068, Stride: 1620},
{Lo: 6156, Hi: 6158, Stride: 1}, {Lo: 6069, Hi: 6155, Stride: 86},
{Lo: 7355, Hi: 7356, Stride: 1}, {Lo: 6156, Hi: 6158, Stride: 1},
{Lo: 8192, Hi: 8207, Stride: 1}, {Lo: 7355, Hi: 7356, Stride: 1},
{Lo: 8234, Hi: 8239, Stride: 1}, {Lo: 8192, Hi: 8207, Stride: 1},
{Lo: 8287, Hi: 8303, Stride: 1}, {Lo: 8234, Hi: 8239, Stride: 1},
{Lo: 10240, Hi: 12288, Stride: 2048}, {Lo: 8287, Hi: 8303, Stride: 1},
{Lo: 12644, Hi: 65024, Stride: 52380}, {Lo: 10240, Hi: 12288, Stride: 2048},
{Lo: 65025, Hi: 65039, Stride: 1}, {Lo: 12644, Hi: 65024, Stride: 52380},
{Lo: 65279, Hi: 65440, Stride: 161}, {Lo: 65025, Hi: 65039, Stride: 1},
{Lo: 65520, Hi: 65528, Stride: 1}, {Lo: 65279, Hi: 65440, Stride: 161},
{Lo: 65532, Hi: 65532, Stride: 1}, {Lo: 65520, Hi: 65528, Stride: 1},
}, {Lo: 65532, Hi: 65532, Stride: 1},
R32: []unicode.Range32{ },
{Lo: 78844, Hi: 119155, Stride: 40311}, R32: []unicode.Range32{
{Lo: 119156, Hi: 119162, Stride: 1}, {Lo: 78844, Hi: 119155, Stride: 40311},
{Lo: 917504, Hi: 917631, Stride: 1}, {Lo: 119156, Hi: 119162, Stride: 1},
{Lo: 917760, Hi: 917999, Stride: 1}, {Lo: 917504, Hi: 917631, Stride: 1},
}, {Lo: 917760, Hi: 917999, Stride: 1},
LatinOffset: 2, },
LatinOffset: 2,
}
} }
+17 -80
View File
@@ -6,8 +6,8 @@ package highlight
import ( import (
"bytes" "bytes"
gohtml "html"
"html/template" "html/template"
"slices"
"sync" "sync"
"code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/log"
@@ -15,7 +15,7 @@ import (
"code.gitea.io/gitea/modules/util" "code.gitea.io/gitea/modules/util"
"github.com/alecthomas/chroma/v2" "github.com/alecthomas/chroma/v2"
"github.com/alecthomas/chroma/v2/formatters/html" chromahtml "github.com/alecthomas/chroma/v2/formatters/html"
"github.com/alecthomas/chroma/v2/styles" "github.com/alecthomas/chroma/v2/styles"
) )
@@ -25,8 +25,6 @@ const sizeLimit = 1024 * 1024
type globalVarsType struct { type globalVarsType struct {
highlightMapping map[string]string highlightMapping map[string]string
githubStyles *chroma.Style githubStyles *chroma.Style
escapeFull []template.HTML
escCtrlCharsMap []template.HTML
} }
var ( var (
@@ -42,69 +40,10 @@ func globalVars() *globalVarsType {
globalVarsPtr = &globalVarsType{} globalVarsPtr = &globalVarsType{}
globalVarsPtr.githubStyles = styles.Get("github") globalVarsPtr.githubStyles = styles.Get("github")
globalVarsPtr.highlightMapping = setting.GetHighlightMapping() globalVarsPtr.highlightMapping = setting.GetHighlightMapping()
globalVarsPtr.escCtrlCharsMap = make([]template.HTML, 256)
// ASCII Table 0x00 - 0x1F
controlCharNames := []string{
"NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
"BS", "HT", "LF", "VT", "FF", "CR", "SO", "SI",
"DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
"CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US",
}
// Uncomment this line if you'd debug the layout without creating a special file, then Space (0x20) will also be escaped.
// Don't worry, even if you forget to comment it out and push it to git repo, the CI tests will catch it and fail.
// controlCharNames = append(controlCharNames, "SP")
for i, s := range controlCharNames {
globalVarsPtr.escCtrlCharsMap[i] = template.HTML(`<span class="broken-code-point" data-escaped="` + s + `"><span class="char">` + string(byte(i)) + `</span></span>`)
}
globalVarsPtr.escCtrlCharsMap[0x7f] = template.HTML(`<span class="broken-code-point" data-escaped="DEL"><span class="char">` + string(byte(0x7f)) + `</span></span>`)
globalVarsPtr.escCtrlCharsMap['\t'] = ""
globalVarsPtr.escCtrlCharsMap['\n'] = ""
globalVarsPtr.escCtrlCharsMap['\r'] = ""
globalVarsPtr.escapeFull = slices.Clone(globalVarsPtr.escCtrlCharsMap)
// exactly the same as Golang's html.EscapeString
globalVarsPtr.escapeFull['&'] = "&amp;"
globalVarsPtr.escapeFull['\''] = "&#39;"
globalVarsPtr.escapeFull['<'] = "&lt;"
globalVarsPtr.escapeFull['>'] = "&gt;"
globalVarsPtr.escapeFull['"'] = "&#34;"
} }
return globalVarsPtr return globalVarsPtr
} }
func escapeByMap(code []byte, escapeMap []template.HTML) template.HTML {
firstEscapePos := -1
for i, c := range code {
if escapeMap[c] != "" {
firstEscapePos = i
break
}
}
if firstEscapePos == -1 {
return template.HTML(util.UnsafeBytesToString(code))
}
buf := make([]byte, firstEscapePos, len(code)*2)
copy(buf[:firstEscapePos], code[:firstEscapePos])
for i := firstEscapePos; i < len(code); i++ {
c := code[i]
if esc := escapeMap[c]; esc != "" {
buf = append(buf, esc...)
} else {
buf = append(buf, c)
}
}
return template.HTML(util.UnsafeBytesToString(buf))
}
func escapeFullString(code string) template.HTML {
return escapeByMap(util.UnsafeStringToBytes(code), globalVars().escapeFull)
}
func escapeControlChars(code []byte) template.HTML {
return escapeByMap(code, globalVars().escCtrlCharsMap)
}
// UnsafeSplitHighlightedLines splits highlighted code into lines preserving HTML tags // UnsafeSplitHighlightedLines splits highlighted code into lines preserving HTML tags
// It always includes '\n', '\n' can appear at the end of each line or in the middle of HTML tags // It always includes '\n', '\n' can appear at the end of each line or in the middle of HTML tags
// The '\n' is necessary for copying code from web UI to preserve original code lines // The '\n' is necessary for copying code from web UI to preserve original code lines
@@ -137,6 +76,10 @@ func UnsafeSplitHighlightedLines(code template.HTML) (ret [][]byte) {
} }
} }
func htmlEscape(code string) template.HTML {
return template.HTML(gohtml.EscapeString(code))
}
// RenderCodeSlowGuess tries to get a lexer by file name and language first, // RenderCodeSlowGuess tries to get a lexer by file name and language first,
// if not found, it will try to guess the lexer by code content, which is slow (more than several hundreds of milliseconds). // if not found, it will try to guess the lexer by code content, which is slow (more than several hundreds of milliseconds).
func RenderCodeSlowGuess(fileName, language, code string) (output template.HTML, lexer chroma.Lexer, lexerDisplayName string) { func RenderCodeSlowGuess(fileName, language, code string) (output template.HTML, lexer chroma.Lexer, lexerDisplayName string) {
@@ -147,7 +90,7 @@ func RenderCodeSlowGuess(fileName, language, code string) (output template.HTML,
} }
if len(code) > sizeLimit { if len(code) > sizeLimit {
return escapeFullString(code), nil, "" return htmlEscape(code), nil, ""
} }
lexer = detectChromaLexerWithAnalyze(fileName, language, util.UnsafeStringToBytes(code)) // it is also slow lexer = detectChromaLexerWithAnalyze(fileName, language, util.UnsafeStringToBytes(code)) // it is also slow
@@ -156,15 +99,15 @@ func RenderCodeSlowGuess(fileName, language, code string) (output template.HTML,
// RenderCodeByLexer returns a HTML version of code string with chroma syntax highlighting classes // RenderCodeByLexer returns a HTML version of code string with chroma syntax highlighting classes
func RenderCodeByLexer(lexer chroma.Lexer, code string) template.HTML { func RenderCodeByLexer(lexer chroma.Lexer, code string) template.HTML {
formatter := html.New(html.WithClasses(true), formatter := chromahtml.New(chromahtml.WithClasses(true),
html.WithLineNumbers(false), chromahtml.WithLineNumbers(false),
html.PreventSurroundingPre(true), chromahtml.PreventSurroundingPre(true),
) )
iterator, err := lexer.Tokenise(nil, code) iterator, err := lexer.Tokenise(nil, code)
if err != nil { if err != nil {
log.Error("Can't tokenize code: %v", err) log.Error("Can't tokenize code: %v", err)
return escapeFullString(code) return htmlEscape(code)
} }
htmlBuf := &bytes.Buffer{} htmlBuf := &bytes.Buffer{}
@@ -172,14 +115,9 @@ func RenderCodeByLexer(lexer chroma.Lexer, code string) template.HTML {
err = formatter.Format(htmlBuf, globalVars().githubStyles, iterator) err = formatter.Format(htmlBuf, globalVars().githubStyles, iterator)
if err != nil { if err != nil {
log.Error("Can't format code: %v", err) log.Error("Can't format code: %v", err)
return escapeFullString(code) return htmlEscape(code)
} }
return template.HTML(util.UnsafeBytesToString(htmlBuf.Bytes()))
// At the moment, we do not escape control chars here (unlike RenderFullFile which escapes control chars).
// The reason is: it is a very rare case that a text file contains control chars.
// This function is usually used by highlight diff and blame, not quite sure whether there will be side effects.
// If there would be new user feedback about this, we can re-consider about various edge cases.
return template.HTML(htmlBuf.String())
} }
// RenderFullFile returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name // RenderFullFile returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name
@@ -191,10 +129,9 @@ func RenderFullFile(fileName, language string, code []byte) ([]template.HTML, st
lexerName := formatLexerName(lexer.Config().Name) lexerName := formatLexerName(lexer.Config().Name)
rendered := RenderCodeByLexer(lexer, util.UnsafeBytesToString(code)) rendered := RenderCodeByLexer(lexer, util.UnsafeBytesToString(code))
unsafeLines := UnsafeSplitHighlightedLines(rendered) unsafeLines := UnsafeSplitHighlightedLines(rendered)
lines := make([]template.HTML, 0, len(unsafeLines)) lines := make([]template.HTML, len(unsafeLines))
for _, lineBytes := range unsafeLines { for idx, lineBytes := range unsafeLines {
line := escapeControlChars(lineBytes) lines[idx] = template.HTML(util.UnsafeBytesToString(lineBytes))
lines = append(lines, line)
} }
return lines, lexerName return lines, lexerName
} }
@@ -213,7 +150,7 @@ func renderPlainText(code []byte) []template.HTML {
content = code[pos : pos+nextPos+1] content = code[pos : pos+nextPos+1]
pos += nextPos + 1 pos += nextPos + 1
} }
lines = append(lines, escapeFullString(util.UnsafeBytesToString(content))) lines = append(lines, htmlEscape(util.UnsafeBytesToString(content)))
} }
return lines return lines
} }
-11
View File
@@ -204,14 +204,3 @@ func TestUnsafeSplitHighlightedLines(t *testing.T) {
assert.Equal(t, "<span>a</span>\n", string(ret[0])) assert.Equal(t, "<span>a</span>\n", string(ret[0]))
assert.Equal(t, "<span>b\n</span>", string(ret[1])) assert.Equal(t, "<span>b\n</span>", string(ret[1]))
} }
func TestEscape(t *testing.T) {
assert.Equal(t, template.HTML("\t\r\n<span class=\"broken-code-point\" data-escaped=\"NUL\"><span class=\"char\">\x00</span></span><span class=\"broken-code-point\" data-escaped=\"US\"><span class=\"char\">\x1f</span></span>&'\"<>"), escapeControlChars([]byte("\t\r\n\x00\x1f&'\"<>")))
assert.Equal(t, template.HTML("<span class=\"broken-code-point\" data-escaped=\"NUL\"><span class=\"char\">\x00</span></span><span class=\"broken-code-point\" data-escaped=\"US\"><span class=\"char\">\x1f</span></span>&amp;&#39;&#34;&lt;&gt;\t\r\n"), escapeFullString("\x00\x1f&'\"<>\t\r\n"))
out, _ := RenderFullFile("a.py", "", []byte("# \x7f<>"))
assert.Equal(t, template.HTML(`<span class="c1"># <span class="broken-code-point" data-escaped="DEL"><span class="char">`+string(byte(0x7f))+`</span></span>&lt;&gt;</span>`), out[0])
out = renderPlainText([]byte("# \x7f<>"))
assert.Equal(t, template.HTML(`# <span class="broken-code-point" data-escaped="DEL"><span class="char">`+string(byte(0x7f))+`</span></span>&lt;&gt;`), out[0])
}
+1 -1
View File
@@ -74,7 +74,7 @@ func HighlightSearchResultCode(filename, language string, lineNums []int, code s
// we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting // we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting
lexer := highlight.DetectChromaLexerByFileName(filename, language) lexer := highlight.DetectChromaLexerByFileName(filename, language)
hl := highlight.RenderCodeByLexer(lexer, code) hl := highlight.RenderCodeByLexer(lexer, code)
highlightedLines := strings.Split(string(hl), "\n") highlightedLines := highlight.UnsafeSplitHighlightedLines(hl)
// The lineNums outputted by render might not match the original lineNums, because "highlight" removes the last `\n` // The lineNums outputted by render might not match the original lineNums, because "highlight" removes the last `\n`
lines := make([]*ResultLine, min(len(highlightedLines), len(lineNums))) lines := make([]*ResultLine, min(len(highlightedLines), len(lineNums)))
+22
View File
@@ -16,6 +16,7 @@ import (
issues_model "code.gitea.io/gitea/models/issues" issues_model "code.gitea.io/gitea/models/issues"
"code.gitea.io/gitea/models/renderhelper" "code.gitea.io/gitea/models/renderhelper"
"code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/emoji" "code.gitea.io/gitea/modules/emoji"
"code.gitea.io/gitea/modules/htmlutil" "code.gitea.io/gitea/modules/htmlutil"
"code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/log"
@@ -277,3 +278,24 @@ func (ut *RenderUtils) RenderThemeItem(info *webtheme.ThemeMetaInfo, iconSize in
extraIcon := svg.RenderHTML(info.GetExtraIconName(), iconSize) extraIcon := svg.RenderHTML(info.GetExtraIconName(), iconSize)
return htmlutil.HTMLFormat(`<div class="theme-menu-item" data-tooltip-content="%s">%s %s %s</div>`, info.GetDescription(), icon, info.DisplayName, extraIcon) return htmlutil.HTMLFormat(`<div class="theme-menu-item" data-tooltip-content="%s">%s %s %s</div>`, info.GetDescription(), icon, info.DisplayName, extraIcon)
} }
func (ut *RenderUtils) RenderUnicodeEscapeToggleButton(escapeStatus *charset.EscapeStatus) template.HTML {
if escapeStatus == nil || !escapeStatus.Escaped {
return ""
}
locale := ut.ctx.Value(translation.ContextKey).(translation.Locale)
var title template.HTML
if escapeStatus.HasAmbiguous {
title += locale.Tr("repo.ambiguous_runes_line")
} else if escapeStatus.HasInvisible {
title += locale.Tr("repo.invisible_runes_line")
}
return htmlutil.HTMLFormat(`<button type="button" class="toggle-escape-button btn interact-bg" title="%s"></button>`, title)
}
func (ut *RenderUtils) RenderUnicodeEscapeToggleTd(combined, escapeStatus *charset.EscapeStatus) template.HTML {
if combined == nil || !combined.Escaped {
return ""
}
return `<td class="lines-escape">` + ut.RenderUnicodeEscapeToggleButton(escapeStatus) + `</td>`
}
+1 -1
View File
@@ -1216,7 +1216,7 @@
"repo.ambiguous_runes_description": "This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.", "repo.ambiguous_runes_description": "This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.",
"repo.invisible_runes_line": "This line has invisible unicode characters", "repo.invisible_runes_line": "This line has invisible unicode characters",
"repo.ambiguous_runes_line": "This line has ambiguous unicode characters", "repo.ambiguous_runes_line": "This line has ambiguous unicode characters",
"repo.ambiguous_character": "%[1]c [U+%04[1]X] can be confused with %[2]c [U+%04[2]X]", "repo.ambiguous_character": "%[1]s can be confused with %[2]s",
"repo.escape_control_characters": "Escape", "repo.escape_control_characters": "Escape",
"repo.unescape_control_characters": "Unescape", "repo.unescape_control_characters": "Unescape",
"repo.file_copy_permalink": "Copy Permalink", "repo.file_copy_permalink": "Copy Permalink",
+26
View File
@@ -17,7 +17,9 @@ import (
"code.gitea.io/gitea/models/db" "code.gitea.io/gitea/models/db"
user_model "code.gitea.io/gitea/models/user" user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/badge" "code.gitea.io/gitea/modules/badge"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/indexer/code"
"code.gitea.io/gitea/modules/templates" "code.gitea.io/gitea/modules/templates"
"code.gitea.io/gitea/modules/util" "code.gitea.io/gitea/modules/util"
"code.gitea.io/gitea/services/context" "code.gitea.io/gitea/services/context"
@@ -190,9 +192,33 @@ func prepareMockData(ctx *context.Context) {
prepareMockDataBadgeActionsSvg(ctx) prepareMockDataBadgeActionsSvg(ctx)
case "/devtest/relative-time": case "/devtest/relative-time":
prepareMockDataRelativeTime(ctx) prepareMockDataRelativeTime(ctx)
case "/devtest/unicode-escape":
prepareMockDataUnicodeEscape(ctx)
} }
} }
func prepareMockDataUnicodeEscape(ctx *context.Context) {
content := "// demo code\n"
content += "if accessLevel != \"user\u202E \u2066// Check if admin (invisible char)\u2069 \u2066\" { }\n"
content += "if O𝐾 { } // ambiguous char\n"
content += "if O𝐾 && accessLevel != \"user\u202E \u2066// ambiguous char + invisible char\u2069 \u2066\" { }\n"
content += "str := `\xef` // broken char\n"
content += "str := `\x00 \x19 \x7f` // control char\n"
lineNums := []int{1, 2, 3, 4, 5, 6, 7, 8, 9}
highlightLines := code.HighlightSearchResultCode("demo.go", "", lineNums, content)
escapeStatus := &charset.EscapeStatus{}
lineEscapeStatus := make([]*charset.EscapeStatus, len(highlightLines))
for i, hl := range highlightLines {
lineEscapeStatus[i], hl.FormattedContent = charset.EscapeControlHTML(hl.FormattedContent, ctx.Locale)
escapeStatus = escapeStatus.Or(lineEscapeStatus[i])
}
ctx.Data["HighlightLines"] = highlightLines
ctx.Data["EscapeStatus"] = escapeStatus
ctx.Data["LineEscapeStatus"] = lineEscapeStatus
}
func TmplCommon(ctx *context.Context) { func TmplCommon(ctx *context.Context) {
prepareMockData(ctx) prepareMockData(ctx)
if ctx.Req.Method == http.MethodPost { if ctx.Req.Method == http.MethodPost {
+1 -1
View File
@@ -159,7 +159,7 @@ func markupRenderToHTML(ctx *context.Context, renderCtx *markup.RenderContext, r
go func() { go func() {
sb := &strings.Builder{} sb := &strings.Builder{}
if markup.RendererNeedPostProcess(renderer) { if markup.RendererNeedPostProcess(renderer) {
escaped, _ = charset.EscapeControlReader(markupRd, sb, ctx.Locale, charset.RuneNBSP) // We allow NBSP here this is rendered escaped, _ = charset.EscapeControlReader(markupRd, sb, ctx.Locale, charset.EscapeOptionsForView())
} else { } else {
escaped = &charset.EscapeStatus{} escaped = &charset.EscapeStatus{}
_, _ = io.Copy(sb, markupRd) _, _ = io.Copy(sb, markupRd)
+1 -2
View File
@@ -258,8 +258,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) {
defer markupWr.Close() defer markupWr.Close()
done := make(chan struct{}) done := make(chan struct{})
go func() { go func() {
// We allow NBSP here this is rendered escaped, _ = charset.EscapeControlReader(markupRd, buf, ctx.Locale, charset.EscapeOptionsForView())
escaped, _ = charset.EscapeControlReader(markupRd, buf, ctx.Locale, charset.RuneNBSP)
output = template.HTML(buf.String()) output = template.HTML(buf.String())
buf.Reset() buf.Reset()
close(done) close(done)
+1 -1
View File
@@ -101,7 +101,7 @@ func renderRepoFileCodePreview(ctx context.Context, opts markup.RenderCodePrevie
escapeStatus := &charset.EscapeStatus{} escapeStatus := &charset.EscapeStatus{}
lineEscapeStatus := make([]*charset.EscapeStatus, len(highlightLines)) lineEscapeStatus := make([]*charset.EscapeStatus, len(highlightLines))
for i, hl := range highlightLines { for i, hl := range highlightLines {
lineEscapeStatus[i], hl.FormattedContent = charset.EscapeControlHTML(hl.FormattedContent, webCtx.Base.Locale, charset.RuneNBSP) lineEscapeStatus[i], hl.FormattedContent = charset.EscapeControlHTML(hl.FormattedContent, webCtx.Base.Locale, charset.EscapeOptionsForView())
escapeStatus = escapeStatus.Or(lineEscapeStatus[i]) escapeStatus = escapeStatus.Or(lineEscapeStatus[i])
} }
@@ -37,10 +37,12 @@ func TestRenderHelperCodePreview(t *testing.T) {
<table class="file-view"> <table class="file-view">
<tbody><tr> <tbody><tr>
<td class="lines-num"><span data-line-number="1"></span></td> <td class="lines-num"><span data-line-number="1"></span></td>
<td class="lines-code chroma"><div class="code-inner"><span class="gh"># repo1</div></td> <td class="lines-code chroma"><div class="code-inner"><span class="gh"># repo1
</span></div></td>
</tr><tr> </tr><tr>
<td class="lines-num"><span data-line-number="2"></span></td> <td class="lines-num"><span data-line-number="2"></span></td>
<td class="lines-code chroma"><div class="code-inner"></span></div></td> <td class="lines-code chroma"><div class="code-inner">
</div></td>
</tr></tbody> </tr></tbody>
</table> </table>
</div> </div>
@@ -64,7 +66,8 @@ func TestRenderHelperCodePreview(t *testing.T) {
<table class="file-view"> <table class="file-view">
<tbody><tr> <tbody><tr>
<td class="lines-num"><span data-line-number="1"></span></td> <td class="lines-num"><span data-line-number="1"></span></td>
<td class="lines-code chroma"><div class="code-inner"><span class="gh"># repo1</div></td> <td class="lines-code chroma"><div class="code-inner"><span class="gh"># repo1
</span></div></td>
</tr></tbody> </tr></tbody>
</table> </table>
</div> </div>
+1 -4
View File
@@ -13,10 +13,7 @@
{{- range $idx, $line := .HighlightLines -}} {{- range $idx, $line := .HighlightLines -}}
<tr> <tr>
<td class="lines-num"><span data-line-number="{{$line.Num}}"></span></td> <td class="lines-num"><span data-line-number="{{$line.Num}}"></span></td>
{{- if $.EscapeStatus.Escaped -}} {{- ctx.RenderUtils.RenderUnicodeEscapeToggleTd $.EscapeStatus (index $.LineEscapeStatus $idx)}}
{{- $lineEscapeStatus := index $.LineEscapeStatus $idx -}}
<td class="lines-escape">{{if $lineEscapeStatus.Escaped}}<a href="#" class="toggle-escape-button btn interact-bg" title="{{if $lineEscapeStatus.HasInvisible}}{{ctx.Locale.Tr "repo.invisible_runes_line"}} {{end}}{{if $lineEscapeStatus.HasAmbiguous}}{{ctx.Locale.Tr "repo.ambiguous_runes_line"}}{{end}}"></a>{{end}}</td>
{{- end}}
<td class="lines-code chroma"><div class="code-inner">{{$line.FormattedContent}}</div></td>{{/* only div works, span generates incorrect HTML structure */}} <td class="lines-code chroma"><div class="code-inner">{{$line.FormattedContent}}</div></td>{{/* only div works, span generates incorrect HTML structure */}}
</tr> </tr>
{{- end -}} {{- end -}}
+17
View File
@@ -0,0 +1,17 @@
{{template "devtest/devtest-header"}}
<div class="page-content repository">
<div class="file-content">
<table class="file-view unicode-escaped">
<tbody>
{{range $idx, $line := .HighlightLines}}
<tr>
<td class="lines-num"><span data-line-number="{{$line.Num}}"></span></td>
{{ctx.RenderUtils.RenderUnicodeEscapeToggleTd $.EscapeStatus (index $.LineEscapeStatus $idx)}}
<td class="lines-code chroma"><div class="code-inner">{{$line.FormattedContent}}</div></td>
</tr>
{{end}}
</tbody>
</table>
</div>
</div>
{{template "devtest/devtest-footer"}}
+3 -7
View File
@@ -66,13 +66,9 @@
<td class="lines-num"> <td class="lines-num">
<span id="L{{$row.RowNumber}}" data-line-number="{{$row.RowNumber}}"></span> <span id="L{{$row.RowNumber}}" data-line-number="{{$row.RowNumber}}"></span>
</td> </td>
{{if $.EscapeStatus.Escaped}}
<td class="lines-escape"> {{ctx.RenderUtils.RenderUnicodeEscapeToggleTd $.EscapeStatus $row.EscapeStatus}}
{{if $row.EscapeStatus.Escaped}}
<button class="toggle-escape-button btn interact-bg" title="{{template "repo/diff/escape_title" dict "diff" $row}}"></button>
{{end}}
</td>
{{end}}
<td rel="L{{$row.RowNumber}}" class="lines-code blame-code chroma"> <td rel="L{{$row.RowNumber}}" class="lines-code blame-code chroma">
<code class="code-inner tw-pl-2">{{$row.Code}}</code> <code class="code-inner tw-pl-2">{{$row.Code}}</code>
</td> </td>
+4 -4
View File
@@ -12,7 +12,7 @@
{{else}} {{else}}
{{$inlineDiff := $.section.GetComputedInlineDiffFor $line ctx.Locale}} {{$inlineDiff := $.section.GetComputedInlineDiffFor $line ctx.Locale}}
<td class="lines-num lines-num-old" data-line-num="{{if $line.LeftIdx}}{{$line.LeftIdx}}{{end}}"><span rel="{{if $line.LeftIdx}}diff-{{$.FileNameHash}}L{{$line.LeftIdx}}{{end}}"></span></td> <td class="lines-num lines-num-old" data-line-num="{{if $line.LeftIdx}}{{$line.LeftIdx}}{{end}}"><span rel="{{if $line.LeftIdx}}diff-{{$.FileNameHash}}L{{$line.LeftIdx}}{{end}}"></span></td>
<td class="lines-escape lines-escape-old">{{if and $line.LeftIdx $inlineDiff.EscapeStatus.Escaped}}<button class="toggle-escape-button btn interact-bg" title="{{template "repo/diff/escape_title" dict "diff" $inlineDiff}}"></button>{{end}}</td> <td class="lines-escape lines-escape-old">{{if $line.LeftIdx}}{{ctx.RenderUtils.RenderUnicodeEscapeToggleButton $inlineDiff.EscapeStatus}}{{end}}</td>
<td class="lines-type-marker lines-type-marker-old">{{if $line.LeftIdx}}<span class="tw-font-mono" data-type-marker=""></span>{{end}}</td> <td class="lines-type-marker lines-type-marker-old">{{if $line.LeftIdx}}<span class="tw-font-mono" data-type-marker=""></span>{{end}}</td>
<td class="lines-code lines-code-old"> <td class="lines-code lines-code-old">
{{/* ATTENTION: BLOB-EXCERPT-COMMENT-RIGHT: here it intentionally use "right" side to comment, because the backend code depends on the assumption that the comment only happens on right side*/}} {{/* ATTENTION: BLOB-EXCERPT-COMMENT-RIGHT: here it intentionally use "right" side to comment, because the backend code depends on the assumption that the comment only happens on right side*/}}
@@ -28,7 +28,7 @@
{{- end -}} {{- end -}}
</td> </td>
<td class="lines-num lines-num-new" data-line-num="{{if $line.RightIdx}}{{$line.RightIdx}}{{end}}"><span rel="{{if $line.RightIdx}}diff-{{$.FileNameHash}}R{{$line.RightIdx}}{{end}}"></span></td> <td class="lines-num lines-num-new" data-line-num="{{if $line.RightIdx}}{{$line.RightIdx}}{{end}}"><span rel="{{if $line.RightIdx}}diff-{{$.FileNameHash}}R{{$line.RightIdx}}{{end}}"></span></td>
<td class="lines-escape lines-escape-new">{{if and $line.RightIdx $inlineDiff.EscapeStatus.Escaped}}<button class="toggle-escape-button btn interact-bg" title="{{template "repo/diff/escape_title" dict "diff" $inlineDiff}}"></button>{{end}}</td> <td class="lines-escape lines-escape-new">{{if and $line.RightIdx}}{{ctx.RenderUtils.RenderUnicodeEscapeToggleButton $inlineDiff.EscapeStatus}}{{end}}</td>
<td class="lines-type-marker lines-type-marker-new">{{if $line.RightIdx}}<span class="tw-font-mono" data-type-marker=""></span>{{end}}</td> <td class="lines-type-marker lines-type-marker-new">{{if $line.RightIdx}}<span class="tw-font-mono" data-type-marker=""></span>{{end}}</td>
<td class="lines-code lines-code-new"> <td class="lines-code lines-code-new">
{{- if and $canCreateComment $line.RightIdx -}} {{- if and $canCreateComment $line.RightIdx -}}
@@ -69,7 +69,7 @@
<td class="lines-num lines-num-new" data-line-num="{{if $line.RightIdx}}{{$line.RightIdx}}{{end}}"><span rel="{{if $line.RightIdx}}diff-{{$.FileNameHash}}R{{$line.RightIdx}}{{end}}"></span></td> <td class="lines-num lines-num-new" data-line-num="{{if $line.RightIdx}}{{$line.RightIdx}}{{end}}"><span rel="{{if $line.RightIdx}}diff-{{$.FileNameHash}}R{{$line.RightIdx}}{{end}}"></span></td>
{{end}} {{end}}
{{$inlineDiff := $.section.GetComputedInlineDiffFor $line ctx.Locale}} {{$inlineDiff := $.section.GetComputedInlineDiffFor $line ctx.Locale}}
<td class="lines-escape">{{if $inlineDiff.EscapeStatus.Escaped}}<button class="toggle-escape-button btn interact-bg" title="{{template "repo/diff/escape_title" dict "diff" $inlineDiff}}"></button>{{end}}</td> <td class="lines-escape">{{ctx.RenderUtils.RenderUnicodeEscapeToggleButton $inlineDiff.EscapeStatus}}</td>
<td class="lines-type-marker"><span class="tw-font-mono" data-type-marker="{{$line.GetLineTypeMarker}}"></span></td> <td class="lines-type-marker"><span class="tw-font-mono" data-type-marker="{{$line.GetLineTypeMarker}}"></span></td>
<td class="lines-code{{if (not $line.RightIdx)}} lines-code-old{{end}}"> <td class="lines-code{{if (not $line.RightIdx)}} lines-code-old{{end}}">
{{- if and $canCreateComment -}} {{- if and $canCreateComment -}}
@@ -77,7 +77,7 @@
{{- svg "octicon-plus" -}} {{- svg "octicon-plus" -}}
</button> </button>
{{- end -}} {{- end -}}
<code {{if $inlineDiff.EscapeStatus.Escaped}}class="code-inner has-escaped" title="{{template "repo/diff/escape_title" dict "diff" $inlineDiff}}"{{else}}class="code-inner"{{end}}>{{$inlineDiff.Content}}</code> <code class="code-inner {{if $inlineDiff.EscapeStatus.Escaped}}has-escaped{{end}}">{{$inlineDiff.Content}}</code>
</td> </td>
</tr> </tr>
{{if $line.Comments}} {{if $line.Comments}}
-2
View File
@@ -1,2 +0,0 @@
{{if .diff.EscapeStatus.HasInvisible}}{{ctx.Locale.Tr "repo.invisible_runes_line"}} {{end -}}
{{- if .diff.EscapeStatus.HasAmbiguous}}{{ctx.Locale.Tr "repo.ambiguous_runes_line"}}{{end}}
+1 -1
View File
@@ -1 +1 @@
<code class="code-inner{{if .diff.EscapeStatus.Escaped}} has-escaped{{end}}"{{if .diff.EscapeStatus.Escaped}} title="{{template "repo/diff/escape_title" .}}"{{end}}>{{.diff.Content}}</code> <code class="code-inner{{if .diff.EscapeStatus.Escaped}} has-escaped{{end}}">{{.diff.Content}}</code>
+5 -5
View File
@@ -18,14 +18,14 @@
{{if eq .GetType 4}} {{if eq .GetType 4}}
{{$inlineDiff := $section.GetComputedInlineDiffFor $line ctx.Locale}} {{$inlineDiff := $section.GetComputedInlineDiffFor $line ctx.Locale}}
<td class="lines-num lines-num-old">{{$line.RenderBlobExcerptButtons $file.NameHash $diffBlobExcerptData}}</td> <td class="lines-num lines-num-old">{{$line.RenderBlobExcerptButtons $file.NameHash $diffBlobExcerptData}}</td>
<td class="lines-escape lines-escape-old">{{if $inlineDiff.EscapeStatus.Escaped}}<button class="toggle-escape-button btn interact-bg" title="{{template "repo/diff/escape_title" dict "diff" $inlineDiff}}"></button>{{end}}</td> <td class="lines-escape lines-escape-old">{{ctx.RenderUtils.RenderUnicodeEscapeToggleButton $inlineDiff.EscapeStatus}}</td>
<td colspan="6" class="lines-code lines-code-old">{{template "repo/diff/section_code" dict "diff" $inlineDiff}}</td> <td colspan="6" class="lines-code lines-code-old">{{template "repo/diff/section_code" dict "diff" $inlineDiff}}</td>
{{else if and (eq .GetType 3) $hasmatch}}{{/* DEL */}} {{else if and (eq .GetType 3) $hasmatch}}{{/* DEL */}}
{{$match := index $section.Lines $line.Match}} {{$match := index $section.Lines $line.Match}}
{{- $leftDiff := ""}}{{if $line.LeftIdx}}{{$leftDiff = $section.GetComputedInlineDiffFor $line ctx.Locale}}{{end}} {{- $leftDiff := ""}}{{if $line.LeftIdx}}{{$leftDiff = $section.GetComputedInlineDiffFor $line ctx.Locale}}{{end}}
{{- $rightDiff := ""}}{{if $match.RightIdx}}{{$rightDiff = $section.GetComputedInlineDiffFor $match ctx.Locale}}{{end}} {{- $rightDiff := ""}}{{if $match.RightIdx}}{{$rightDiff = $section.GetComputedInlineDiffFor $match ctx.Locale}}{{end}}
<td class="lines-num lines-num-old del-code" data-line-num="{{$line.LeftIdx}}"><span rel="diff-{{$file.NameHash}}L{{$line.LeftIdx}}"></span></td> <td class="lines-num lines-num-old del-code" data-line-num="{{$line.LeftIdx}}"><span rel="diff-{{$file.NameHash}}L{{$line.LeftIdx}}"></span></td>
<td class="lines-escape del-code lines-escape-old">{{if $line.LeftIdx}}{{if $leftDiff.EscapeStatus.Escaped}}<button class="toggle-escape-button btn interact-bg" title="{{template "repo/diff/escape_title" dict "diff" $leftDiff}}"></button>{{end}}{{end}}</td> <td class="lines-escape del-code lines-escape-old">{{if $line.LeftIdx}}{{ctx.RenderUtils.RenderUnicodeEscapeToggleButton $leftDiff.EscapeStatus}}{{end}}</td>
<td class="lines-type-marker lines-type-marker-old del-code"><span class="tw-font-mono" data-type-marker="{{$line.GetLineTypeMarker}}"></span></td> <td class="lines-type-marker lines-type-marker-old del-code"><span class="tw-font-mono" data-type-marker="{{$line.GetLineTypeMarker}}"></span></td>
<td class="lines-code lines-code-old del-code"> <td class="lines-code lines-code-old del-code">
{{- if and $.root.SignedUserID $.root.PageIsPullFiles -}} {{- if and $.root.SignedUserID $.root.PageIsPullFiles -}}
@@ -40,7 +40,7 @@
{{- end -}} {{- end -}}
</td> </td>
<td class="lines-num lines-num-new add-code" data-line-num="{{if $match.RightIdx}}{{$match.RightIdx}}{{end}}"><span rel="{{if $match.RightIdx}}diff-{{$file.NameHash}}R{{$match.RightIdx}}{{end}}"></span></td> <td class="lines-num lines-num-new add-code" data-line-num="{{if $match.RightIdx}}{{$match.RightIdx}}{{end}}"><span rel="{{if $match.RightIdx}}diff-{{$file.NameHash}}R{{$match.RightIdx}}{{end}}"></span></td>
<td class="lines-escape add-code lines-escape-new">{{if $match.RightIdx}}{{if $rightDiff.EscapeStatus.Escaped}}<button class="toggle-escape-button btn interact-bg" title="{{template "repo/diff/escape_title" dict "diff" $rightDiff}}"></button>{{end}}{{end}}</td> <td class="lines-escape add-code lines-escape-new">{{if $match.RightIdx}}{{ctx.RenderUtils.RenderUnicodeEscapeToggleButton $rightDiff.EscapeStatus}}{{end}}</td>
<td class="lines-type-marker lines-type-marker-new add-code">{{if $match.RightIdx}}<span class="tw-font-mono" data-type-marker="{{$match.GetLineTypeMarker}}"></span>{{end}}</td> <td class="lines-type-marker lines-type-marker-new add-code">{{if $match.RightIdx}}<span class="tw-font-mono" data-type-marker="{{$match.GetLineTypeMarker}}"></span>{{end}}</td>
<td class="lines-code lines-code-new add-code"> <td class="lines-code lines-code-new add-code">
{{- if and $.root.SignedUserID $.root.PageIsPullFiles -}} {{- if and $.root.SignedUserID $.root.PageIsPullFiles -}}
@@ -57,7 +57,7 @@
{{else}} {{else}}
{{$inlineDiff := $section.GetComputedInlineDiffFor $line ctx.Locale}} {{$inlineDiff := $section.GetComputedInlineDiffFor $line ctx.Locale}}
<td class="lines-num lines-num-old" data-line-num="{{if $line.LeftIdx}}{{$line.LeftIdx}}{{end}}"><span rel="{{if $line.LeftIdx}}diff-{{$file.NameHash}}L{{$line.LeftIdx}}{{end}}"></span></td> <td class="lines-num lines-num-old" data-line-num="{{if $line.LeftIdx}}{{$line.LeftIdx}}{{end}}"><span rel="{{if $line.LeftIdx}}diff-{{$file.NameHash}}L{{$line.LeftIdx}}{{end}}"></span></td>
<td class="lines-escape lines-escape-old">{{if $line.LeftIdx}}{{if $inlineDiff.EscapeStatus.Escaped}}<button class="toggle-escape-button btn interact-bg" title="{{template "repo/diff/escape_title" dict "diff" $inlineDiff}}"></button>{{end}}{{end}}</td> <td class="lines-escape lines-escape-old">{{if $line.LeftIdx}}{{ctx.RenderUtils.RenderUnicodeEscapeToggleButton $inlineDiff.EscapeStatus}}{{end}}</td>
<td class="lines-type-marker lines-type-marker-old">{{if $line.LeftIdx}}<span class="tw-font-mono" data-type-marker="{{$line.GetLineTypeMarker}}"></span>{{end}}</td> <td class="lines-type-marker lines-type-marker-old">{{if $line.LeftIdx}}<span class="tw-font-mono" data-type-marker="{{$line.GetLineTypeMarker}}"></span>{{end}}</td>
<td class="lines-code lines-code-old"> <td class="lines-code lines-code-old">
{{- if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 2)) -}} {{- if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 2)) -}}
@@ -72,7 +72,7 @@
{{- end -}} {{- end -}}
</td> </td>
<td class="lines-num lines-num-new" data-line-num="{{if $line.RightIdx}}{{$line.RightIdx}}{{end}}"><span rel="{{if $line.RightIdx}}diff-{{$file.NameHash}}R{{$line.RightIdx}}{{end}}"></span></td> <td class="lines-num lines-num-new" data-line-num="{{if $line.RightIdx}}{{$line.RightIdx}}{{end}}"><span rel="{{if $line.RightIdx}}diff-{{$file.NameHash}}R{{$line.RightIdx}}{{end}}"></span></td>
<td class="lines-escape lines-escape-new">{{if $line.RightIdx}}{{if $inlineDiff.EscapeStatus.Escaped}}<button class="toggle-escape-button btn interact-bg" title="{{template "repo/diff/escape_title" dict "diff" $inlineDiff}}"></button>{{end}}{{end}}</td> <td class="lines-escape lines-escape-new">{{if $line.RightIdx}}{{ctx.RenderUtils.RenderUnicodeEscapeToggleButton $inlineDiff.EscapeStatus}}{{end}}</td>
<td class="lines-type-marker lines-type-marker-new">{{if $line.RightIdx}}<span class="tw-font-mono" data-type-marker="{{$line.GetLineTypeMarker}}"></span>{{end}}</td> <td class="lines-type-marker lines-type-marker-new">{{if $line.RightIdx}}<span class="tw-font-mono" data-type-marker="{{$line.GetLineTypeMarker}}"></span>{{end}}</td>
<td class="lines-code lines-code-new"> <td class="lines-code lines-code-new">
{{- if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 3)) -}} {{- if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 3)) -}}
+1 -3
View File
@@ -24,9 +24,7 @@
{{end}} {{end}}
{{$inlineDiff := $section.GetComputedInlineDiffFor $line ctx.Locale -}} {{$inlineDiff := $section.GetComputedInlineDiffFor $line ctx.Locale -}}
<td class="lines-escape"> <td class="lines-escape">
{{- if $inlineDiff.EscapeStatus.Escaped -}} {{ctx.RenderUtils.RenderUnicodeEscapeToggleButton $inlineDiff.EscapeStatus}}
<button class="toggle-escape-button btn interact-bg" title="{{template "repo/diff/escape_title" dict "diff" $inlineDiff}}"></button>
{{- end -}}
</td> </td>
<td class="lines-type-marker"><span class="tw-font-mono" data-type-marker="{{$line.GetLineTypeMarker}}"></span></td> <td class="lines-type-marker"><span class="tw-font-mono" data-type-marker="{{$line.GetLineTypeMarker}}"></span></td>
{{if eq .GetType 4}} {{if eq .GetType 4}}
+1 -3
View File
@@ -109,9 +109,7 @@
{{$line := Eval $idx "+" 1}} {{$line := Eval $idx "+" 1}}
<tr> <tr>
<td class="lines-num"><span id="L{{$line}}" data-line-number="{{$line}}"></span></td> <td class="lines-num"><span id="L{{$line}}" data-line-number="{{$line}}"></span></td>
{{if $.EscapeStatus.Escaped}} {{ctx.RenderUtils.RenderUnicodeEscapeToggleTd $.EscapeStatus (index $.LineEscapeStatus $idx)}}
<td class="lines-escape">{{if (index $.LineEscapeStatus $idx).Escaped}}<button class="toggle-escape-button btn interact-bg" title="{{if (index $.LineEscapeStatus $idx).HasInvisible}}{{ctx.Locale.Tr "repo.invisible_runes_line"}} {{end}}{{if (index $.LineEscapeStatus $idx).HasAmbiguous}}{{ctx.Locale.Tr "repo.ambiguous_runes_line"}}{{end}}"></button>{{end}}</td>
{{end}}
<td rel="L{{$line}}" class="lines-code chroma"><code class="code-inner">{{$code}}</code></td> <td rel="L{{$line}}" class="lines-code chroma"><code class="code-inner">{{$code}}</code></td>
</tr> </tr>
{{end}} {{end}}
+2
View File
@@ -17,6 +17,7 @@
/* images */ /* images */
--checkbox-mask-checked: url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" width="12" height="9" viewBox="0 0 12 9"><path fill-rule="evenodd" d="M11.78.22a.75.75 0 0 1 0 1.061L4.52 8.541a.75.75 0 0 1-1.062 0L.202 5.285a.75.75 0 0 1 1.061-1.061l2.725 2.723L10.718.22a.75.75 0 0 1 1.062 0"/></svg>'); --checkbox-mask-checked: url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" width="12" height="9" viewBox="0 0 12 9"><path fill-rule="evenodd" d="M11.78.22a.75.75 0 0 1 0 1.061L4.52 8.541a.75.75 0 0 1-1.062 0L.202 5.285a.75.75 0 0 1 1.061-1.061l2.725 2.723L10.718.22a.75.75 0 0 1 1.062 0"/></svg>');
--checkbox-mask-indeterminate: url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" width="10" height="2" viewBox="0 0 10 2"><path fill-rule="evenodd" d="M0 1a1 1 0 0 1 1-1h8a1 1 0 1 1 0 2H1a1 1 0 0 1-1-1" clip-rule="evenodd"/></svg>'); --checkbox-mask-indeterminate: url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" width="10" height="2" viewBox="0 0 10 2"><path fill-rule="evenodd" d="M0 1a1 1 0 0 1 1-1h8a1 1 0 1 1 0 2H1a1 1 0 0 1-1-1" clip-rule="evenodd"/></svg>');
--octicon-alert-fill: url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" width="16" height="16"><path d="M6.457 1.047c.659-1.234 2.427-1.234 3.086 0l6.082 11.378A1.75 1.75 0 0 1 14.082 15H1.918a1.75 1.75 0 0 1-1.543-2.575ZM8 5a.75.75 0 0 0-.75.75v2.5a.75.75 0 0 0 1.5 0v-2.5A.75.75 0 0 0 8 5Zm1 6a1 1 0 1 0-2 0 1 1 0 0 0 2 0Z"/></svg>');
--octicon-chevron-right: url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" width="16" height="16"><path d="M6.22 3.22a.75.75 0 0 1 1.06 0l4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L9.94 8 6.22 4.28a.75.75 0 0 1 0-1.06Z"></path></svg>'); --octicon-chevron-right: url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" width="16" height="16"><path d="M6.22 3.22a.75.75 0 0 1 1.06 0l4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L9.94 8 6.22 4.28a.75.75 0 0 1 0-1.06Z"></path></svg>');
--octicon-x: url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" width="16" height="16"><path d="M3.72 3.72a.75.75 0 0 1 1.06 0L8 6.94l3.22-3.22a.749.749 0 0 1 1.275.326.75.75 0 0 1-.215.734L9.06 8l3.22 3.22a.749.749 0 0 1-.326 1.275.75.75 0 0 1-.734-.215L8 9.06l-3.22 3.22a.75.75 0 0 1-1.042-.018.75.75 0 0 1-.018-1.042L6.94 8 3.72 4.78a.75.75 0 0 1 0-1.06"/></svg>'); --octicon-x: url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" width="16" height="16"><path d="M3.72 3.72a.75.75 0 0 1 1.06 0L8 6.94l3.22-3.22a.749.749 0 0 1 1.275.326.75.75 0 0 1-.215.734L9.06 8l3.22 3.22a.749.749 0 0 1-.326 1.275.75.75 0 0 1-.734-.215L8 9.06l-3.22 3.22a.75.75 0 0 1-1.042-.018.75.75 0 0 1-.018-1.042L6.94 8 3.72 4.78a.75.75 0 0 1 0-1.06"/></svg>');
--select-arrows: url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" width="16" height="16"><path d="m4.074 9.427 3.396 3.396a.25.25 0 0 0 .354 0l3.396-3.396A.25.25 0 0 0 11.043 9H4.251a.25.25 0 0 0-.177.427m0-1.957L7.47 4.073a.25.25 0 0 1 .354 0L11.22 7.47a.25.25 0 0 1-.177.426H4.251a.25.25 0 0 1-.177-.426"/></svg>'); --select-arrows: url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" width="16" height="16"><path d="m4.074 9.427 3.396 3.396a.25.25 0 0 0 .354 0l3.396-3.396A.25.25 0 0 0 11.043 9H4.251a.25.25 0 0 0-.177.427m0-1.957L7.47 4.073a.25.25 0 0 1 .354 0L11.22 7.47a.25.25 0 0 1-.177.426H4.251a.25.25 0 0 1-.177-.426"/></svg>');
@@ -686,6 +687,7 @@ overflow-menu .ui.label {
} }
.lines-num, .lines-num,
.lines-escape,
.lines-code { .lines-code {
font-size: 12px; font-size: 12px;
font-family: var(--fonts-monospace); font-family: var(--fonts-monospace);
+15 -8
View File
@@ -1,24 +1,31 @@
/* /*
Show the escaped and hide the real char: Show the escaped and hide the real char:
<span class="broken-code-point" data-escaped="DEL"><span class="char">{real-char}</span></span> <span class="broken-code-point" data-escaped=""><span class="char">{real-char}</span></span>
Only show the real-char: Only show the real-char:
<span class="broken-code-point">{real-char}</span> <span class="broken-code-point">{real-char}</span>
*/ */
.broken-code-point:not([data-escaped]), .broken-code-point:not([data-escaped]) {
.broken-code-point[data-escaped]::before {
border-radius: 4px; border-radius: 4px;
padding: 0 2px; padding: 0 2px;
color: var(--color-body); border: 1px var(--color-yellow) solid;
background: var(--color-text-light-1); }
.broken-code-point[data-escaped] {
position: relative;
} }
.broken-code-point[data-escaped]::before { .broken-code-point[data-escaped]::before {
visibility: visible; visibility: visible;
content: attr(data-escaped); content: attr(data-escaped);
border-radius: 2px;
padding: 0 1px;
border: 1px var(--color-yellow) solid;
} }
.broken-code-point[data-escaped] .char { .broken-code-point[data-escaped] .char {
/* make it copyable by selecting the text (AI suggestion, no other solution) */ /* keep the original character selectable/copyable while showing the escaped label via ::before */
position: absolute; position: absolute;
left: 0;
opacity: 0; opacity: 0;
pointer-events: none; pointer-events: none;
} }
@@ -26,11 +33,11 @@ Only show the real-char:
/* /*
Show the escaped and hide the real-char: Show the escaped and hide the real-char:
<span class="unicode-escaped"> <span class="unicode-escaped">
<span class="escaped-code-point" data-escaped="U+1F600"><span class="char">{real-char}</span></span> <span class="escaped-code-point" data-escaped="[U+1F600]"><span class="char">{real-char}</span></span>
</span> </span>
Hide the escaped and show the real-char: Hide the escaped and show the real-char:
<span> <span>
<span class="escaped-code-point" data-escaped="U+1F600"><span class="char">{real-char}</span></span> <span class="escaped-code-point" data-escaped="[U+1F600]"><span class="char">{real-char}</span></span>
</span> </span>
*/ */
.unicode-escaped .escaped-code-point[data-escaped]::before { .unicode-escaped .escaped-code-point[data-escaped]::before {
+13 -4
View File
@@ -15,11 +15,20 @@
transform: scale(1.1); transform: scale(1.1);
} }
.lines-escape .toggle-escape-button {
margin: -1px 2px 0;
}
.lines-escape .toggle-escape-button::before { .lines-escape .toggle-escape-button::before {
visibility: visible; content: "";
content: "⚠️"; display: inline-flex;
font-family: var(--fonts-emoji); width: 14px;
color: var(--color-red); height: 14px;
background-color: var(--color-yellow); /* TODO: maybe it needs a new kind of color, there is no suitable "warning" color in the current palette */
mask-image: var(--octicon-alert-fill);
-webkit-mask-image: var(--octicon-alert-fill);
mask-size: contain;
-webkit-mask-size: contain;
} }
.repository .diff-file-box .code-diff td.lines-escape { .repository .diff-file-box .code-diff td.lines-escape {