mirror of
https://github.com/github/codeql-action.git
synced 2025-12-29 10:40:17 +08:00
217 lines
5.5 KiB
TypeScript
217 lines
5.5 KiB
TypeScript
import languageMap from 'language-map/languages.json';
|
|
import { ExtensionJustify } from './utils';
|
|
|
|
interface ExtensionsTypes {
|
|
[key: string]: string;
|
|
}
|
|
|
|
/**
|
|
* The extension map can contain multiple languages with the same extension,
|
|
* but we only want a single one. For the moment, these clashes are resolved
|
|
* by the simple heuristic below listing high-priority languages. We may want
|
|
* to consider smarter heuristics to correctly identify languages in cases
|
|
* where the extension is ambiguous. The ordering of the list matters and
|
|
* languages earlier on will get a higher priority when resolving clashes.
|
|
*/
|
|
const importantLanguages = [
|
|
'javascript',
|
|
'typescript',
|
|
'ruby',
|
|
'python',
|
|
'java',
|
|
'c',
|
|
'c++',
|
|
'c#',
|
|
'rust',
|
|
'scala',
|
|
'perl',
|
|
'go',
|
|
];
|
|
|
|
export interface Regexes {
|
|
singleLineComment: RegExp;
|
|
multiLineCommentOpen?: RegExp;
|
|
multiLineCommentOpenStart?: RegExp;
|
|
multiLineCommentClose?: RegExp;
|
|
multiLineCommentCloseEnd?: RegExp;
|
|
multiLineCommentOpenAndClose?: RegExp;
|
|
}
|
|
|
|
const ALL_REGEXES: Record<string, Regexes> = {
|
|
c: {
|
|
// matches when // are the first two characters of a line
|
|
singleLineComment: /^\/\//,
|
|
|
|
// matches when /* exists in a line
|
|
multiLineCommentOpen: /\/\*/,
|
|
|
|
// matches when /* starts a line
|
|
multiLineCommentOpenStart: /^\/\*/,
|
|
|
|
// matches when */ exists a line
|
|
multiLineCommentClose: /\*\//,
|
|
|
|
// matches when */ ends a line
|
|
multiLineCommentCloseEnd: /\*\/$/,
|
|
|
|
// matches /* ... */
|
|
multiLineCommentOpenAndClose: /\/\*.*\*\//,
|
|
},
|
|
|
|
python: {
|
|
// matches when # the first character of a line
|
|
singleLineComment: /^#/,
|
|
|
|
// matches when """ starts a line. This is not right, since
|
|
// a multiline string is not always a comment, but for the
|
|
// sake of simplicity, we will do that here.
|
|
// multiLineCommentOpen: /"""/,
|
|
|
|
// matches when """ starts a line
|
|
// multiLineCommentOpenStart: /^"""/,
|
|
|
|
// matches when """ exists in a line
|
|
// multiLineCommentClose: /"""/,
|
|
|
|
// matches when """ ends a line
|
|
// multiLineCommentCloseEnd: /"""$/,
|
|
|
|
// matches """ ... """
|
|
// multiLineCommentOpenAndClose: /""".*"""/,
|
|
},
|
|
|
|
ruby: {
|
|
// matches when # the first character of a line
|
|
singleLineComment: /^#/,
|
|
|
|
// For ruby multiline comments, =begin and =end must be
|
|
// on their own lines
|
|
|
|
// matches when =begin starts a line
|
|
multiLineCommentOpen: /^=begin/,
|
|
|
|
// matches when "begin starts a line
|
|
multiLineCommentOpenStart: /^=begin/,
|
|
|
|
// matches when "end ends a line
|
|
multiLineCommentClose: /^=end/,
|
|
|
|
// matches when "end ends a line
|
|
multiLineCommentCloseEnd: /^=end$/,
|
|
|
|
// not possible in ruby
|
|
multiLineCommentOpenAndClose: /^\0$/,
|
|
},
|
|
|
|
html: {
|
|
// There is no single line comment
|
|
singleLineComment: /^\0$/,
|
|
|
|
// matches when =begin starts a line
|
|
multiLineCommentOpen: /<!--/,
|
|
|
|
// matches when "begin starts a line
|
|
multiLineCommentOpenStart: /^<!--/,
|
|
|
|
// matches when "end ends a line
|
|
multiLineCommentClose: /-->/,
|
|
|
|
// matches when "end ends a line
|
|
multiLineCommentCloseEnd: /-->$/,
|
|
|
|
// matches <!-- ... -->
|
|
multiLineCommentOpenAndClose: /<!--.*-->/,
|
|
},
|
|
};
|
|
|
|
/**
|
|
* detecte program language through file extension
|
|
*
|
|
* @export
|
|
* @class LanguageDetector
|
|
*/
|
|
export class Languages {
|
|
extensionMap: {
|
|
[key: string]: string;
|
|
} = {};
|
|
|
|
/**
|
|
* Creates an instance of Detector.
|
|
*/
|
|
constructor() {
|
|
this.extensionMap = this.loadExtensionMap();
|
|
}
|
|
|
|
/**
|
|
* load language before detecting
|
|
*/
|
|
private loadExtensionMap = () => {
|
|
const extensions: ExtensionsTypes = {};
|
|
|
|
Object.keys(languageMap).forEach((language) => {
|
|
const languageMode = languageMap[language];
|
|
const languageExtensions = (languageMode && languageMode.extensions) || [];
|
|
languageExtensions.forEach((extension: string) => {
|
|
const lowerCaseExtension = extension.toLowerCase();
|
|
const lowerCaseLanguage = language.toLowerCase();
|
|
if (!extensions[lowerCaseExtension]) {
|
|
extensions[lowerCaseExtension] = lowerCaseLanguage;
|
|
} else {
|
|
const currentLanguagePriority = importantLanguages.indexOf(
|
|
extensions[lowerCaseExtension],
|
|
);
|
|
if (currentLanguagePriority === -1) {
|
|
extensions[lowerCaseExtension] = lowerCaseLanguage;
|
|
} else {
|
|
const otherPriority = importantLanguages.indexOf(lowerCaseLanguage);
|
|
if (otherPriority !== -1 && otherPriority < currentLanguagePriority)
|
|
extensions[lowerCaseExtension] = lowerCaseLanguage;
|
|
}
|
|
}
|
|
});
|
|
});
|
|
|
|
return { ...extensions, ...ExtensionJustify };
|
|
};
|
|
|
|
/**
|
|
* Retrieve the regular expressions for a given language.
|
|
* This is incomplete, but covers most of the languages we
|
|
* see in the wild.
|
|
*
|
|
* @param language the language to retrieve regexes for
|
|
*/
|
|
public getRegexes(language: string): Regexes {
|
|
switch (language) {
|
|
case 'html':
|
|
case 'xml':
|
|
return ALL_REGEXES.html;
|
|
|
|
case 'ruby':
|
|
return ALL_REGEXES.ruby;
|
|
|
|
case 'python':
|
|
return ALL_REGEXES.python;
|
|
|
|
default:
|
|
// not exact, but likely the best guess for any other unspecified language.
|
|
return ALL_REGEXES.c;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* return extension map
|
|
*/
|
|
public getExtensionMap() {
|
|
return this.extensionMap;
|
|
}
|
|
|
|
/**
|
|
* get file type through a path
|
|
*/
|
|
public getType(path: string): string {
|
|
const fileExtension = `.${path.split('.').pop()}`;
|
|
return this.extensionMap[fileExtension] || '';
|
|
}
|
|
}
|