Files
codeql-action/node_modules/github-linguist/src/languages.ts
Andrew Eisenberg b29bf7b05a Upgrade linguist dependency
This version changes how it counts python heredoc. All heredoc is
counted as code.
2021-08-25 11:15:45 -07:00

217 lines
5.5 KiB
TypeScript

import languageMap from 'language-map/languages.json';
import { ExtensionJustify } from './utils';
interface ExtensionsTypes {
[key: string]: string;
}
/**
* The extension map can contain multiple languages with the same extension,
* but we only want a single one. For the moment, these clashes are resolved
* by the simple heuristic below listing high-priority languages. We may want
* to consider smarter heuristics to correctly identify languages in cases
* where the extension is ambiguous. The ordering of the list matters and
* languages earlier on will get a higher priority when resolving clashes.
*/
const importantLanguages = [
'javascript',
'typescript',
'ruby',
'python',
'java',
'c',
'c++',
'c#',
'rust',
'scala',
'perl',
'go',
];
export interface Regexes {
singleLineComment: RegExp;
multiLineCommentOpen?: RegExp;
multiLineCommentOpenStart?: RegExp;
multiLineCommentClose?: RegExp;
multiLineCommentCloseEnd?: RegExp;
multiLineCommentOpenAndClose?: RegExp;
}
const ALL_REGEXES: Record<string, Regexes> = {
c: {
// matches when // are the first two characters of a line
singleLineComment: /^\/\//,
// matches when /* exists in a line
multiLineCommentOpen: /\/\*/,
// matches when /* starts a line
multiLineCommentOpenStart: /^\/\*/,
// matches when */ exists a line
multiLineCommentClose: /\*\//,
// matches when */ ends a line
multiLineCommentCloseEnd: /\*\/$/,
// matches /* ... */
multiLineCommentOpenAndClose: /\/\*.*\*\//,
},
python: {
// matches when # the first character of a line
singleLineComment: /^#/,
// matches when """ starts a line. This is not right, since
// a multiline string is not always a comment, but for the
// sake of simplicity, we will do that here.
// multiLineCommentOpen: /"""/,
// matches when """ starts a line
// multiLineCommentOpenStart: /^"""/,
// matches when """ exists in a line
// multiLineCommentClose: /"""/,
// matches when """ ends a line
// multiLineCommentCloseEnd: /"""$/,
// matches """ ... """
// multiLineCommentOpenAndClose: /""".*"""/,
},
ruby: {
// matches when # the first character of a line
singleLineComment: /^#/,
// For ruby multiline comments, =begin and =end must be
// on their own lines
// matches when =begin starts a line
multiLineCommentOpen: /^=begin/,
// matches when "begin starts a line
multiLineCommentOpenStart: /^=begin/,
// matches when "end ends a line
multiLineCommentClose: /^=end/,
// matches when "end ends a line
multiLineCommentCloseEnd: /^=end$/,
// not possible in ruby
multiLineCommentOpenAndClose: /^\0$/,
},
html: {
// There is no single line comment
singleLineComment: /^\0$/,
// matches when =begin starts a line
multiLineCommentOpen: /<!--/,
// matches when "begin starts a line
multiLineCommentOpenStart: /^<!--/,
// matches when "end ends a line
multiLineCommentClose: /-->/,
// matches when "end ends a line
multiLineCommentCloseEnd: /-->$/,
// matches <!-- ... -->
multiLineCommentOpenAndClose: /<!--.*-->/,
},
};
/**
* detecte program language through file extension
*
* @export
* @class LanguageDetector
*/
export class Languages {
extensionMap: {
[key: string]: string;
} = {};
/**
* Creates an instance of Detector.
*/
constructor() {
this.extensionMap = this.loadExtensionMap();
}
/**
* load language before detecting
*/
private loadExtensionMap = () => {
const extensions: ExtensionsTypes = {};
Object.keys(languageMap).forEach((language) => {
const languageMode = languageMap[language];
const languageExtensions = (languageMode && languageMode.extensions) || [];
languageExtensions.forEach((extension: string) => {
const lowerCaseExtension = extension.toLowerCase();
const lowerCaseLanguage = language.toLowerCase();
if (!extensions[lowerCaseExtension]) {
extensions[lowerCaseExtension] = lowerCaseLanguage;
} else {
const currentLanguagePriority = importantLanguages.indexOf(
extensions[lowerCaseExtension],
);
if (currentLanguagePriority === -1) {
extensions[lowerCaseExtension] = lowerCaseLanguage;
} else {
const otherPriority = importantLanguages.indexOf(lowerCaseLanguage);
if (otherPriority !== -1 && otherPriority < currentLanguagePriority)
extensions[lowerCaseExtension] = lowerCaseLanguage;
}
}
});
});
return { ...extensions, ...ExtensionJustify };
};
/**
* Retrieve the regular expressions for a given language.
* This is incomplete, but covers most of the languages we
* see in the wild.
*
* @param language the language to retrieve regexes for
*/
public getRegexes(language: string): Regexes {
switch (language) {
case 'html':
case 'xml':
return ALL_REGEXES.html;
case 'ruby':
return ALL_REGEXES.ruby;
case 'python':
return ALL_REGEXES.python;
default:
// not exact, but likely the best guess for any other unspecified language.
return ALL_REGEXES.c;
}
}
/**
* return extension map
*/
public getExtensionMap() {
return this.extensionMap;
}
/**
* get file type through a path
*/
public getType(path: string): string {
const fileExtension = `.${path.split('.').pop()}`;
return this.extensionMap[fileExtension] || '';
}
}