mirror of
https://github.com/mastodon/mastodon.git
synced 2024-12-16 15:16:38 +00:00
Updates to bio metadata script
This commit is contained in:
parent
f5e2469485
commit
21bafc6555
|
@ -69,6 +69,10 @@ functions are:
|
|||
easier to read and to maintain. I leave it to the future readers of
|
||||
this code to determine the extent of my successes in this endeavor.
|
||||
|
||||
UPDATE 19 Oct 2017: We no longer allow character escapes inside our
|
||||
double-quoted strings for ease of processing. We now internally use
|
||||
the name "ƔAML" in our code to clarify that this is Not Quite YAML.
|
||||
|
||||
Sending love + warmth eternal,
|
||||
- kibigo [@kibi@glitch.social]
|
||||
|
||||
|
@ -96,10 +100,7 @@ const ALLOWED_CHAR = unirex( // `c-printable` in the YAML 1.2 spec.
|
|||
compat_mode ? '[\t\n\r\x20-\x7e\x85\xa0-\ufffd]' : '[\t\n\r\x20-\x7e\x85\xa0-\ud7ff\ue000-\ufffd\u{10000}-\u{10FFFF}]'
|
||||
);
|
||||
const WHITE_SPACE = /[ \t]/;
|
||||
const INDENTATION = / */; // Indentation must be only spaces.
|
||||
const LINE_BREAK = /\r?\n|\r|<br\s*\/?>/;
|
||||
const ESCAPE_CHAR = /[0abt\tnvfre "\/\\N_LP]/;
|
||||
const HEXADECIMAL_CHARS = /[0-9a-fA-F]/;
|
||||
const INDICATOR = /[-?:,[\]{}&#*!|>'"%@`]/;
|
||||
const FLOW_CHAR = /[,[\]{}]/;
|
||||
|
||||
|
@ -121,7 +122,7 @@ const NEW_LINE = unirex(
|
|||
rexstr(ANY_WHITE_SPACE) + rexstr(LINE_BREAK)
|
||||
);
|
||||
const SOME_NEW_LINES = unirex(
|
||||
'(?:' + rexstr(ANY_WHITE_SPACE) + rexstr(LINE_BREAK) + ')+'
|
||||
'(?:' + rexstr(NEW_LINE) + ')+'
|
||||
);
|
||||
const POSSIBLE_STARTS = unirex(
|
||||
rexstr(DOCUMENT_START) + rexstr(/<p[^<>]*>/) + '?'
|
||||
|
@ -131,22 +132,13 @@ const POSSIBLE_ENDS = unirex(
|
|||
rexstr(DOCUMENT_END) + '|' +
|
||||
rexstr(/<\/p>/)
|
||||
);
|
||||
const CHARACTER_ESCAPE = unirex(
|
||||
rexstr(/\\/) +
|
||||
'(?:' +
|
||||
rexstr(ESCAPE_CHAR) + '|' +
|
||||
rexstr(/x/) + rexstr(HEXADECIMAL_CHARS) + '{2}' + '|' +
|
||||
rexstr(/u/) + rexstr(HEXADECIMAL_CHARS) + '{4}' + '|' +
|
||||
rexstr(/U/) + rexstr(HEXADECIMAL_CHARS) + '{8}' +
|
||||
')'
|
||||
const QUOTE_CHAR = unirex(
|
||||
'(?=' + rexstr(NOT_LINE_BREAK) + ')[^"]'
|
||||
);
|
||||
const ESCAPED_CHAR = unirex(
|
||||
rexstr(/(?!["\\])/) + rexstr(NOT_LINE_BREAK) + '|' +
|
||||
rexstr(CHARACTER_ESCAPE)
|
||||
);
|
||||
const ANY_ESCAPED_CHARS = unirex(
|
||||
rexstr(ESCAPED_CHAR) + '*'
|
||||
const ANY_QUOTE_CHAR = unirex(
|
||||
rexstr(QUOTE_CHAR) + '*'
|
||||
);
|
||||
|
||||
const ESCAPED_APOS = unirex(
|
||||
'(?=' + rexstr(NOT_LINE_BREAK) + ')' + rexstr(/[^']|''/)
|
||||
);
|
||||
|
@ -190,120 +182,76 @@ const LATER_VALUE_CHAR = unirex(
|
|||
|
||||
/* YAML CONSTRUCTS */
|
||||
|
||||
const YAML_START = unirex(
|
||||
rexstr(ANY_WHITE_SPACE) + rexstr(/---/)
|
||||
const ƔAML_START = unirex(
|
||||
rexstr(ANY_WHITE_SPACE) + '---'
|
||||
);
|
||||
const YAML_END = unirex(
|
||||
rexstr(ANY_WHITE_SPACE) + rexstr(/(?:---|\.\.\.)/)
|
||||
const ƔAML_END = unirex(
|
||||
rexstr(ANY_WHITE_SPACE) + '(?:---|\.\.\.)'
|
||||
);
|
||||
const YAML_LOOKAHEAD = unirex(
|
||||
const ƔAML_LOOKAHEAD = unirex(
|
||||
'(?=' +
|
||||
rexstr(YAML_START) +
|
||||
rexstr(ƔAML_START) +
|
||||
rexstr(ANY_ALLOWED_CHARS) + rexstr(NEW_LINE) +
|
||||
rexstr(YAML_END) + rexstr(POSSIBLE_ENDS) +
|
||||
rexstr(ƔAML_END) + rexstr(POSSIBLE_ENDS) +
|
||||
')'
|
||||
);
|
||||
const YAML_DOUBLE_QUOTE = unirex(
|
||||
rexstr(/"/) + rexstr(ANY_ESCAPED_CHARS) + rexstr(/"/)
|
||||
const ƔAML_DOUBLE_QUOTE = unirex(
|
||||
'"' + rexstr(ANY_QUOTE_CHAR) + '"'
|
||||
);
|
||||
const YAML_SINGLE_QUOTE = unirex(
|
||||
rexstr(/'/) + rexstr(ANY_ESCAPED_APOS) + rexstr(/'/)
|
||||
const ƔAML_SINGLE_QUOTE = unirex(
|
||||
'\'' + rexstr(ANY_ESCAPED_APOS) + '\''
|
||||
);
|
||||
const YAML_SIMPLE_KEY = unirex(
|
||||
const ƔAML_SIMPLE_KEY = unirex(
|
||||
rexstr(FIRST_KEY_CHAR) + rexstr(LATER_KEY_CHAR) + '*'
|
||||
);
|
||||
const YAML_SIMPLE_VALUE = unirex(
|
||||
const ƔAML_SIMPLE_VALUE = unirex(
|
||||
rexstr(FIRST_VALUE_CHAR) + rexstr(LATER_VALUE_CHAR) + '*'
|
||||
);
|
||||
const YAML_KEY = unirex(
|
||||
rexstr(YAML_DOUBLE_QUOTE) + '|' +
|
||||
rexstr(YAML_SINGLE_QUOTE) + '|' +
|
||||
rexstr(YAML_SIMPLE_KEY)
|
||||
const ƔAML_KEY = unirex(
|
||||
rexstr(ƔAML_DOUBLE_QUOTE) + '|' +
|
||||
rexstr(ƔAML_SINGLE_QUOTE) + '|' +
|
||||
rexstr(ƔAML_SIMPLE_KEY)
|
||||
);
|
||||
const YAML_VALUE = unirex(
|
||||
rexstr(YAML_DOUBLE_QUOTE) + '|' +
|
||||
rexstr(YAML_SINGLE_QUOTE) + '|' +
|
||||
rexstr(YAML_SIMPLE_VALUE)
|
||||
const ƔAML_VALUE = unirex(
|
||||
rexstr(ƔAML_DOUBLE_QUOTE) + '|' +
|
||||
rexstr(ƔAML_SINGLE_QUOTE) + '|' +
|
||||
rexstr(ƔAML_SIMPLE_VALUE)
|
||||
);
|
||||
const YAML_SEPARATOR = unirex(
|
||||
const ƔAML_SEPARATOR = unirex(
|
||||
rexstr(ANY_WHITE_SPACE) +
|
||||
':' + rexstr(WHITE_SPACE) +
|
||||
rexstr(ANY_WHITE_SPACE)
|
||||
);
|
||||
const YAML_LINE = unirex(
|
||||
'(' + rexstr(YAML_KEY) + ')' +
|
||||
rexstr(YAML_SEPARATOR) +
|
||||
'(' + rexstr(YAML_VALUE) + ')'
|
||||
const ƔAML_LINE = unirex(
|
||||
'(' + rexstr(ƔAML_KEY) + ')' +
|
||||
rexstr(ƔAML_SEPARATOR) +
|
||||
'(' + rexstr(ƔAML_VALUE) + ')'
|
||||
);
|
||||
|
||||
/* FRONTMATTER REGEX */
|
||||
|
||||
const YAML_FRONTMATTER = unirex(
|
||||
const ƔAML_FRONTMATTER = unirex(
|
||||
rexstr(POSSIBLE_STARTS) +
|
||||
rexstr(YAML_LOOKAHEAD) +
|
||||
rexstr(YAML_START) + rexstr(SOME_NEW_LINES) +
|
||||
rexstr(ƔAML_LOOKAHEAD) +
|
||||
rexstr(ƔAML_START) + rexstr(SOME_NEW_LINES) +
|
||||
'(?:' +
|
||||
'(' + rexstr(INDENTATION) + ')' +
|
||||
rexstr(YAML_LINE) + rexstr(SOME_NEW_LINES) +
|
||||
'(?:' +
|
||||
'\\1' + rexstr(YAML_LINE) + rexstr(SOME_NEW_LINES) +
|
||||
'){0,4}' +
|
||||
')?' +
|
||||
rexstr(YAML_END) + rexstr(POSSIBLE_ENDS)
|
||||
rexstr(ANY_WHITE_SPACE) + rexstr(ƔAML_LINE) + rexstr(SOME_NEW_LINES) +
|
||||
'){0,5}' +
|
||||
rexstr(ƔAML_END) + rexstr(POSSIBLE_ENDS)
|
||||
);
|
||||
|
||||
/* SEARCHES */
|
||||
|
||||
const FIND_YAML_LINES = unirex(
|
||||
rexstr(NEW_LINE) + rexstr(INDENTATION) + rexstr(YAML_LINE)
|
||||
const FIND_ƔAML_LINE = unirex(
|
||||
rexstr(NEW_LINE) + rexstr(ANY_WHITE_SPACE) + rexstr(ƔAML_LINE)
|
||||
);
|
||||
|
||||
/* STRING PROCESSING */
|
||||
|
||||
function processString(str) {
|
||||
function processString (str) {
|
||||
switch (str.charAt(0)) {
|
||||
case '"':
|
||||
return str
|
||||
.substring(1, str.length - 1)
|
||||
.replace(/\\0/g, '\x00')
|
||||
.replace(/\\a/g, '\x07')
|
||||
.replace(/\\b/g, '\x08')
|
||||
.replace(/\\t/g, '\x09')
|
||||
.replace(/\\\x09/g, '\x09')
|
||||
.replace(/\\n/g, '\x0a')
|
||||
.replace(/\\v/g, '\x0b')
|
||||
.replace(/\\f/g, '\x0c')
|
||||
.replace(/\\r/g, '\x0d')
|
||||
.replace(/\\e/g, '\x1b')
|
||||
.replace(/\\ /g, '\x20')
|
||||
.replace(/\\"/g, '\x22')
|
||||
.replace(/\\\//g, '\x2f')
|
||||
.replace(/\\\\/g, '\x5c')
|
||||
.replace(/\\N/g, '\x85')
|
||||
.replace(/\\_/g, '\xa0')
|
||||
.replace(/\\L/g, '\u2028')
|
||||
.replace(/\\P/g, '\u2029')
|
||||
.replace(
|
||||
new RegExp(
|
||||
unirex(
|
||||
rexstr(/\\x/) + '(' + rexstr(HEXADECIMAL_CHARS) + '{2})'
|
||||
), 'gu'
|
||||
), (_, n) => String.fromCodePoint('0x' + n)
|
||||
)
|
||||
.replace(
|
||||
new RegExp(
|
||||
unirex(
|
||||
rexstr(/\\u/) + '(' + rexstr(HEXADECIMAL_CHARS) + '{4})'
|
||||
), 'gu'
|
||||
), (_, n) => String.fromCodePoint('0x' + n)
|
||||
)
|
||||
.replace(
|
||||
new RegExp(
|
||||
unirex(
|
||||
rexstr(/\\U/) + '(' + rexstr(HEXADECIMAL_CHARS) + '{8})'
|
||||
), 'gu'
|
||||
), (_, n) => String.fromCodePoint('0x' + n)
|
||||
);
|
||||
return str.substring(1, str.length - 1);
|
||||
case '\'':
|
||||
return str
|
||||
.substring(1, str.length - 1)
|
||||
|
@ -321,15 +269,18 @@ export function processBio(content) {
|
|||
text: content,
|
||||
metadata: [],
|
||||
};
|
||||
let yaml = content.match(YAML_FRONTMATTER);
|
||||
if (!yaml) return result;
|
||||
else yaml = yaml[0];
|
||||
let start = content.search(YAML_START);
|
||||
let end = start + yaml.length - yaml.search(YAML_START);
|
||||
result.text = content.substr(0, start) + content.substr(end);
|
||||
let ɣaml = content.match(ƔAML_FRONTMATTER);
|
||||
if (!ɣaml) {
|
||||
return result;
|
||||
} else {
|
||||
ɣaml = ɣaml[0];
|
||||
}
|
||||
const start = content.search(ƔAML_START);
|
||||
const end = start + ɣaml.length - ɣaml.search(ƔAML_START);
|
||||
result.text = content.substr(end);
|
||||
let metadata = null;
|
||||
let query = new RegExp(FIND_YAML_LINES, 'g');
|
||||
while ((metadata = query.exec(yaml))) {
|
||||
let query = new RegExp(rexstr(FIND_ƔAML_LINE), 'g'); // Some browsers don't allow flags unless both args are strings
|
||||
while ((metadata = query.exec(ɣaml))) {
|
||||
result.metadata.push([
|
||||
processString(metadata[1]),
|
||||
processString(metadata[2]),
|
||||
|
@ -352,63 +303,23 @@ export function createBio(note, data) {
|
|||
let val = '' + data[i][1];
|
||||
|
||||
// Key processing
|
||||
if (key === (key.match(YAML_SIMPLE_KEY) || [])[0]) /* do nothing */;
|
||||
else if (key.indexOf('\'') === -1 && key === (key.match(ANY_ESCAPED_APOS) || [])[0]) key = '\'' + key + '\'';
|
||||
if (key === (key.match(ƔAML_SIMPLE_KEY) || [])[0]) /* do nothing */;
|
||||
else if (key === (key.match(ANY_QUOTE_CHAR) || [])[0]) key = '"' + key + '"';
|
||||
else {
|
||||
key = key
|
||||
.replace(/\x00/g, '\\0')
|
||||
.replace(/\x07/g, '\\a')
|
||||
.replace(/\x08/g, '\\b')
|
||||
.replace(/\x0a/g, '\\n')
|
||||
.replace(/\x0b/g, '\\v')
|
||||
.replace(/\x0c/g, '\\f')
|
||||
.replace(/\x0d/g, '\\r')
|
||||
.replace(/\x1b/g, '\\e')
|
||||
.replace(/\x22/g, '\\"')
|
||||
.replace(/\x5c/g, '\\\\');
|
||||
let badchars = key.match(
|
||||
new RegExp(rexstr(NOT_ALLOWED_CHAR), 'gu')
|
||||
) || [];
|
||||
for (let j = 0; j < badchars.length; j++) {
|
||||
key = key.replace(
|
||||
badchars[i],
|
||||
'\\u' + badchars[i].codePointAt(0).toLocaleString('en', {
|
||||
useGrouping: false,
|
||||
minimumIntegerDigits: 4,
|
||||
})
|
||||
);
|
||||
}
|
||||
key = '"' + key + '"';
|
||||
.replace(/'/g, '\'\'')
|
||||
.replace(new RegExp(rexstr(NOT_ALLOWED_CHAR), compat_mode ? 'g' : 'gu'), '<27>');
|
||||
key = '\'' + key + '\'';
|
||||
}
|
||||
|
||||
// Value processing
|
||||
if (val === (val.match(YAML_SIMPLE_VALUE) || [])[0]) /* do nothing */;
|
||||
else if (val.indexOf('\'') === -1 && val === (val.match(ANY_ESCAPED_APOS) || [])[0]) val = '\'' + val + '\'';
|
||||
if (val === (val.match(ƔAML_SIMPLE_VALUE) || [])[0]) /* do nothing */;
|
||||
else if (val === (val.match(ANY_QUOTE_CHAR) || [])[0]) val = '"' + val + '"';
|
||||
else {
|
||||
val = val
|
||||
.replace(/\x00/g, '\\0')
|
||||
.replace(/\x07/g, '\\a')
|
||||
.replace(/\x08/g, '\\b')
|
||||
.replace(/\x0a/g, '\\n')
|
||||
.replace(/\x0b/g, '\\v')
|
||||
.replace(/\x0c/g, '\\f')
|
||||
.replace(/\x0d/g, '\\r')
|
||||
.replace(/\x1b/g, '\\e')
|
||||
.replace(/\x22/g, '\\"')
|
||||
.replace(/\x5c/g, '\\\\');
|
||||
let badchars = val.match(
|
||||
new RegExp(rexstr(NOT_ALLOWED_CHAR), 'gu')
|
||||
) || [];
|
||||
for (let j = 0; j < badchars.length; j++) {
|
||||
val = val.replace(
|
||||
badchars[i],
|
||||
'\\u' + badchars[i].codePointAt(0).toLocaleString('en', {
|
||||
useGrouping: false,
|
||||
minimumIntegerDigits: 4,
|
||||
})
|
||||
);
|
||||
}
|
||||
val = '"' + val + '"';
|
||||
key = key
|
||||
.replace(/'/g, '\'\'')
|
||||
.replace(new RegExp(rexstr(NOT_ALLOWED_CHAR), compat_mode ? 'g' : 'gu'), '<27>');
|
||||
key = '\'' + key + '\'';
|
||||
}
|
||||
|
||||
frontmatter += key + ': ' + val + '\n';
|
||||
|
|
Loading…
Reference in a new issue