Upgrade linguist dependency

This version changes how it counts python heredoc. All heredoc is
counted as code.
This commit is contained in:
Andrew Eisenberg
2021-08-25 10:45:44 -07:00
parent a44b61d961
commit b29bf7b05a
32 changed files with 410 additions and 339 deletions

View File

@@ -13,7 +13,7 @@ program
.description('count lines of code in a file')
.action(async (pathPattern) => {
try {
const info = await (new LocFile(pathPattern).getFileInfo());
const info = await new LocFile(pathPattern).getFileInfo();
// eslint-disable-next-line no-console
console.log(
chalk.cyan(`
@@ -52,9 +52,9 @@ const formatInfo = (
program.arguments('<cmd> [env]').action(async (cmd) => {
try {
const { info, languages } = await (new LocDir({
include: cmd
}).loadInfo());
const { info, languages } = await new LocDir({
include: cmd,
}).loadInfo();
// eslint-disable-next-line no-console
console.log(chalk.cyan(formatInfo(info, languages)));
} catch (e) {

2
node_modules/github-linguist/src/declarations.d.ts generated vendored Normal file
View File

@@ -0,0 +1,2 @@
declare module 'slash2';
declare module 'language-map/languages.json';

View File

@@ -1,7 +1,6 @@
import globby from 'globby';
import fs from 'fs-extra';
import path from 'path';
// @ts-ignore
import slash from 'slash2';
import { LineInfo, LocFile } from './file';
@@ -41,18 +40,18 @@ const defaultExclude = [
// java
'**/target',
"**/*.class",
"**/*.o",
"**/bin",
"**/*.map",
'**/*.class',
'**/*.o',
'**/bin',
'**/*.map',
// python
"**/*.pyc",
"**/*.pyo",
'**/*.pyc',
'**/*.pyo',
// other
"**/*.dil",
"**/*.ra",
'**/*.dil',
'**/*.ra',
// images
'**/*.png',
@@ -122,29 +121,39 @@ const defaultExclude = [
'**/*.tgz',
];
function ensureArray(arr?: string[] | string, dfault?: string) {
if (!arr) {
return dfault ? [dfault] : [];
}
return Array.isArray(arr) ? arr : [arr];
}
/**
* Collect the info of a directory.
*/
export class LocDir {
private cwd: string;
private include: string[];
private exclude: string[];
private analysisLanguages?: string[];
private allLanguages = new Languages();
constructor(options: LocDirOptions) {
// ensure all excludes are globstar. Note that '**/*.ts/**' matches files
// that end in .ts because the globstar indicates 0 or more directory paths.
this.exclude = ensureArray(options.exclude)
.concat(defaultExclude)
.map(item => item.endsWith('**') ? item : `${item}/**`);
.map((item) => (item.endsWith('**') ? item : `${item}/**`));
// remove all leading './' since this messes up globstar matches in the
// excludes.
this.include = ensureArray(options.include, '**')
.map(item => item.startsWith('./') ? item.substring(2) : item)
.map(item => item.endsWith('**') ? item : `${item}/**`);
.map((item) => (item.startsWith('./') ? item.substring(2) : item))
.map((item) => (item.endsWith('**') ? item : `${item}/**`));
this.cwd = options.cwd || process.cwd();
this.analysisLanguages = options.analysisLanguages;
}
@@ -156,7 +165,7 @@ export class LocDir {
const paths = await globby(this.include, {
cwd: this.cwd,
ignore: this.exclude,
nodir: true
nodir: true,
});
const files: string[] = [];
const info: LineInfo = { ...defaultInfo };
@@ -168,6 +177,7 @@ export class LocDir {
// We _could_ use Promise.all to count the files in parallel, but that
// would lead to out of memory errors when there are many files.
// eslint-disable-next-line no-restricted-syntax
for (const pathItem of paths) {
const fullPath = slash(path.join(this.cwd, pathItem));
if (
@@ -178,6 +188,7 @@ export class LocDir {
) {
continue;
}
const file = new LocFile(fullPath);
const fileLineInfo = await file.getFileInfo();
const { lines } = fileLineInfo;
@@ -208,15 +219,9 @@ export class LocDir {
* and this language is not one of them.
*/
private ignoreLanguage(pathItem: string): boolean {
return this.analysisLanguages && !this.analysisLanguages.includes(this.allLanguages.getType(pathItem));
return (
this.analysisLanguages &&
!this.analysisLanguages.includes(this.allLanguages.getType(pathItem))
);
}
}
function ensureArray(arr?: string[] | string, dfault?: string) {
if (!arr) {
return dfault ? [dfault] : [];
}
return Array.isArray(arr)
? arr
: [arr];
}

View File

@@ -3,8 +3,6 @@
*/
import * as fs from 'fs-extra';
import * as Path from 'path';
// @ts-ignore
import slash from 'slash2';
import { Languages, Regexes } from './languages';
@@ -40,6 +38,7 @@ const DefaultFileInfo: FileInfo = {
*/
export class LocFile {
public path: string;
private rawPath: string;
private languages = new Languages();
@@ -59,18 +58,16 @@ export class LocFile {
const total = codeLength;
let inMultiLineComment = false;
lines.forEach((line) => {
lines.forEach((origLine) => {
let lineType = 'code';
line = line.trim();
const line = origLine.trim();
if (inMultiLineComment) {
let noCode = true;
if (regexes.multiLineCommentClose.test(line)) {
if (regexes.multiLineCommentClose?.test(line)) {
// line contains the end of a multi-line comment
inMultiLineComment = false;
if (!regexes.multiLineCommentCloseEnd.test(line)) {
if (!regexes.multiLineCommentCloseEnd?.test(line)) {
// the multiline comment does not end this line.
// there is real code on it.
noCode = false;
@@ -82,33 +79,29 @@ export class LocFile {
commentLength += 1;
codeLength -= 1;
}
} else if (line) {
// non-empty line
if (regexes.multiLineCommentOpen.test(line)) {
if (regexes.multiLineCommentOpen?.test(line)) {
// line contains the start of a multi-line comment
// might contain some real code, but we'll let that slide
if (!regexes.multiLineCommentOpenAndClose.test(line)) {
if (!regexes.multiLineCommentOpenAndClose?.test(line)) {
// comment is not also closed on this line
inMultiLineComment = true;
}
if (regexes.multiLineCommentOpenStart.test(line)) {
if (regexes.multiLineCommentOpenStart?.test(line)) {
// The comment starts the line. There is no other code on this line
commentLength += 1;
codeLength -= 1;
lineType = 'comm';
}
} else if (regexes.singleLineComment.test(line)) {
} else if (regexes.singleLineComment?.test(line)) {
// line contains only a single line comment
commentLength += 1;
codeLength -= 1;
lineType = 'comm';
}
} else {
// empty line
codeLength -= 1;
@@ -116,7 +109,7 @@ export class LocFile {
}
if (this.debug) {
console.log(lineType, line)
console.log(lineType, line);
}
});
@@ -137,14 +130,16 @@ export class LocFile {
}
let newData = data;
const info: FileInfo = Object.assign({}, DefaultFileInfo);
const name = this.path.split(Path.sep).pop() || '';
const info: FileInfo = { ...DefaultFileInfo };
// note: do not use Path.sep here since we have already
// translated the path to a posix path using slash(...).
const name = this.path.split('/').pop() || '';
try {
const stat = await fs.stat(this.path);
if (!stat.isFile()) {
return info;
}
newData = data || await fs.readFile(this.path, 'utf-8');
newData = data || (await fs.readFile(this.path, 'utf-8'));
info.name = name;
info.size = (stat && stat.size) || 0;
info.languages = this.languages.getType(this.path);
@@ -162,7 +157,7 @@ export class LocFile {
}
public getFileInfoByContent(name: string, data: string): FileInfo {
const info: FileInfo = Object.assign({}, DefaultFileInfo);
const info: FileInfo = { ...DefaultFileInfo };
info.name = name;
info.languages = this.languages.getType(name);
info.lines = this.filterData(data, this.languages.getRegexes(info.languages));

View File

@@ -1,4 +1,3 @@
// @ts-ignore
import slash from 'slash2';
import fs from 'fs-extra';
@@ -8,9 +7,7 @@ import { LocFile } from './file';
export { LocDir, LocDirOptions } from './directory';
export { LocFile, LineInfo } from './file';
const loc = async (
fileOrDir: string,
): Promise<LocResult> => {
const loc = async (fileOrDir: string): Promise<LocResult> => {
const stat = await fs.stat(slash(fileOrDir));
if (stat.isFile()) {
const locFile = new LocFile(slash(fileOrDir));

View File

@@ -1,16 +1,10 @@
import languageMap from 'language-map/languages.json';
import { ExtensionJustify } from './utils';
// tslint:disable-next-line
const languageMap = require('language-map');
// tslint:disable-next-line
// const lang = require('language-classifier');
interface ExtensionsTypes {
[key: string]: string;
}
export interface DetectorOptions {}
/**
* The extension map can contain multiple languages with the same extension,
* but we only want a single one. For the moment, these clashes are resolved
@@ -19,8 +13,116 @@ export interface DetectorOptions {}
* where the extension is ambiguous. The ordering of the list matters and
* languages earlier on will get a higher priority when resolving clashes.
*/
const importantLanguages = ["javascript", "typescript", "ruby", "python", "java", "c", "c++", "c#", "rust", "scala", "perl", "go"];
const importantLanguages = [
'javascript',
'typescript',
'ruby',
'python',
'java',
'c',
'c++',
'c#',
'rust',
'scala',
'perl',
'go',
];
export interface Regexes {
singleLineComment: RegExp;
multiLineCommentOpen?: RegExp;
multiLineCommentOpenStart?: RegExp;
multiLineCommentClose?: RegExp;
multiLineCommentCloseEnd?: RegExp;
multiLineCommentOpenAndClose?: RegExp;
}
const ALL_REGEXES: Record<string, Regexes> = {
c: {
// matches when // are the first two characters of a line
singleLineComment: /^\/\//,
// matches when /* exists in a line
multiLineCommentOpen: /\/\*/,
// matches when /* starts a line
multiLineCommentOpenStart: /^\/\*/,
// matches when */ exists a line
multiLineCommentClose: /\*\//,
// matches when */ ends a line
multiLineCommentCloseEnd: /\*\/$/,
// matches /* ... */
multiLineCommentOpenAndClose: /\/\*.*\*\//,
},
python: {
// matches when # the first character of a line
singleLineComment: /^#/,
// matches when """ starts a line. This is not right, since
// a multiline string is not always a comment, but for the
// sake of simplicity, we will do that here.
// multiLineCommentOpen: /"""/,
// matches when """ starts a line
// multiLineCommentOpenStart: /^"""/,
// matches when """ exists in a line
// multiLineCommentClose: /"""/,
// matches when """ ends a line
// multiLineCommentCloseEnd: /"""$/,
// matches """ ... """
// multiLineCommentOpenAndClose: /""".*"""/,
},
ruby: {
// matches when # the first character of a line
singleLineComment: /^#/,
// For ruby multiline comments, =begin and =end must be
// on their own lines
// matches when =begin starts a line
multiLineCommentOpen: /^=begin/,
// matches when "begin starts a line
multiLineCommentOpenStart: /^=begin/,
// matches when "end ends a line
multiLineCommentClose: /^=end/,
// matches when "end ends a line
multiLineCommentCloseEnd: /^=end$/,
// not possible in ruby
multiLineCommentOpenAndClose: /^\0$/,
},
html: {
// There is no single line comment
singleLineComment: /^\0$/,
// matches when =begin starts a line
multiLineCommentOpen: /<!--/,
// matches when "begin starts a line
multiLineCommentOpenStart: /^<!--/,
// matches when "end ends a line
multiLineCommentClose: /-->/,
// matches when "end ends a line
multiLineCommentCloseEnd: /-->$/,
// matches <!-- ... -->
multiLineCommentOpenAndClose: /<!--.*-->/,
},
};
/**
* detecte program language through file extension
@@ -51,11 +153,13 @@ export class Languages {
const languageExtensions = (languageMode && languageMode.extensions) || [];
languageExtensions.forEach((extension: string) => {
const lowerCaseExtension = extension.toLowerCase();
const lowerCaseLanguage = language.toLowerCase()
const lowerCaseLanguage = language.toLowerCase();
if (!extensions[lowerCaseExtension]) {
extensions[lowerCaseExtension] = lowerCaseLanguage;
} else {
const currentLanguagePriority = importantLanguages.indexOf(extensions[lowerCaseExtension]);
const currentLanguagePriority = importantLanguages.indexOf(
extensions[lowerCaseExtension],
);
if (currentLanguagePriority === -1) {
extensions[lowerCaseExtension] = lowerCaseLanguage;
} else {
@@ -67,8 +171,8 @@ export class Languages {
});
});
return Object.assign({}, extensions, ExtensionJustify);
}
return { ...extensions, ...ExtensionJustify };
};
/**
* Retrieve the regular expressions for a given language.
@@ -78,7 +182,7 @@ export class Languages {
* @param language the language to retrieve regexes for
*/
public getRegexes(language: string): Regexes {
switch(language) {
switch (language) {
case 'html':
case 'xml':
return ALL_REGEXES.html;
@@ -105,104 +209,8 @@ export class Languages {
/**
* get file type through a path
*/
public getType(path: string): string {
public getType(path: string): string {
const fileExtension = `.${path.split('.').pop()}`;
return this.extensionMap[fileExtension] || '';
}
}
export interface Regexes {
singleLineComment: RegExp;
multiLineCommentOpen: RegExp;
multiLineCommentOpenStart: RegExp;
multiLineCommentClose: RegExp;
multiLineCommentCloseEnd: RegExp;
multiLineCommentOpenAndClose: RegExp;
}
const ALL_REGEXES: Record<string, Regexes> = {
c: {
// matches when // are the first two characters of a line
singleLineComment: /^\/\//,
// matches when /* exists in a line
multiLineCommentOpen: /\/\*/,
// matches when /* starts a line
multiLineCommentOpenStart: /^\/\*/,
// matches when */ exists a line
multiLineCommentClose: /\*\//,
// matches when */ ends a line
multiLineCommentCloseEnd: /\*\/$/,
// matches /* ... */
multiLineCommentOpenAndClose: /\/\*.*\*\//
},
python: {
// matches when # the first character of a line
singleLineComment: /^#/,
// matches when """ starts a line. This is not right, since
// a multiline string is not always a comment, but for the
// sake of simplicity, we will do that here.
multiLineCommentOpen: /"""/,
// matches when """ starts a line
multiLineCommentOpenStart: /^"""/,
// matches when """ exists in a line
multiLineCommentClose: /"""/,
// matches when """ ends a line
multiLineCommentCloseEnd: /"""$/,
// matches """ ... """
multiLineCommentOpenAndClose: /""".*"""/
},
ruby: {
// matches when # the first character of a line
singleLineComment: /^#/,
// For ruby multiline comments, =begin and =end must be
// on their own lines
// matches when =begin starts a line
multiLineCommentOpen: /^=begin/,
// matches when "begin starts a line
multiLineCommentOpenStart: /^=begin/,
// matches when "end ends a line
multiLineCommentClose: /^=end/,
// matches when "end ends a line
multiLineCommentCloseEnd: /^=end$/,
// not possible in ruby
multiLineCommentOpenAndClose: /^\0$/
},
html: {
// There is no single line comment
singleLineComment: /^\0$/,
// matches when =begin starts a line
multiLineCommentOpen: /<!--/,
// matches when "begin starts a line
multiLineCommentOpenStart: /^<!--/,
// matches when "end ends a line
multiLineCommentClose: /-->/,
// matches when "end ends a line
multiLineCommentCloseEnd: /-->$/,
// matches <!-- ... -->
multiLineCommentOpenAndClose: /<!--.*-->/
}
};

View File

@@ -1,6 +1,5 @@
import fs from 'fs';
import path from 'path';
// @ts-ignore
import slash from 'slash2';
const packagePath = slash(path.join(__dirname, '../', 'package.json'));