1
0
mirror of https://git.FreeBSD.org/src.git synced 2026-06-02 11:24:32 +00:00

md5: Encode non-printable filenames

Encode filenames in the VIS_CSTYLE | VIS_OCTAL style regardless of
output mode.  When reading filenames from a checksum file, attempt to
decode them, and use the decoded name unless the decoded name does not
exist but the undecoded one does.

This breaks compatibility with GNU coreutils, which unfortunately uses
a non-reversible encoding when outputting filenames containing
non-printable characters.

While here, drop a sentence about preimage attacks against MD5 and SHA1
from the manual page, as I no longer trust it to be true.

MFC after:	1 week
Reviewed by:	bcr, markj
Differential Revision:	https://reviews.freebsd.org/D56615
This commit is contained in:
Dag-Erling Smørgrav
2026-04-29 20:41:05 +02:00
parent 6883b120c5
commit 70fde0ed6b
2 changed files with 57 additions and 14 deletions
+22 -8
View File
@@ -1,4 +1,4 @@
.Dd March 12, 2024
.Dd April 6, 2026
.Dt MD5 1
.Os
.Sh NAME
@@ -84,11 +84,18 @@ If no files are listed on the command line, or a file name is given as
.Pa - ,
input is taken from stdin instead.
.Pp
The utility's different modes have different output formats, but in
all cases, filenames containing unprintable characters are encoded as
described in
.Xr vis 3
using the
.Dv VIS_CSTYLE \&| Dv VIS_OCTAL
style.
.Pp
It is conjectured that it is computationally infeasible to
produce two messages having the same message digest, or to produce any
message having a given prespecified target message digest.
The SHA-224 , SHA-256 , SHA-384 , SHA-512, RIPEMD-160,
and SKEIN
The SHA-224, SHA-256, SHA-384, SHA-512, RIPEMD-160, and SKEIN
algorithms are intended for digital signature applications, where a
large file must be
.Dq compressed
@@ -99,10 +106,6 @@ key under a public-key cryptosystem such as RSA.
The MD5 and SHA-1 algorithms have been proven to be vulnerable to practical
collision attacks and should not be relied upon to produce unique outputs,
.Em nor should they be used as part of a cryptographic signature scheme.
As of 2017-03-02, there is no publicly known method to
.Em reverse
either algorithm, i.e., to find an input that produces a specific
output.
.Pp
SHA-512t256 is a version of SHA-512 truncated to only 256 bits.
On 64-bit hardware, this algorithm is approximately 50% faster than SHA-256 but
@@ -385,7 +388,8 @@ is printed instead of
.Xr sha256 3 ,
.Xr sha384 3 ,
.Xr sha512 3 ,
.Xr skein 3
.Xr skein 3 ,
.Xr vis 3
.Rs
.%A R. Rivest
.%T The MD5 Message-Digest Algorithm
@@ -411,6 +415,16 @@ Secure Hash Standard (SHS):
.Pp
The RIPEMD-160 page:
.Pa https://homes.esat.kuleuven.be/~bosselae/ripemd160.html
.Sh CAVEATS
The encoding used for file names containing non-printable characters
is incompatible with that used by GNU coreutils.
The encoding used by GNU coreutils is non-reversible, in that certain
non-printable characters are encoded while others are simply omitted.
The encoding used by this utility, on the other hand, is fully
reversible.
.Pp
If interoperability with GNU coreutils is required, it is recommended
to ensure that all file names contain only printable characters.
.Sh BUGS
In bits mode, the original
.Nm shasum
+35 -6
View File
@@ -42,6 +42,7 @@
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <vis.h>
#ifdef HAVE_CAPSICUM
#include <sys/capsicum.h>
@@ -314,6 +315,7 @@ gnu_check(const char *checksumsfile)
const char *digestname;
size_t digestnamelen;
size_t hashstrlen;
size_t filenamelen;
struct stat st;
if (strcmp(checksumsfile, "-") == 0)
@@ -343,12 +345,14 @@ gnu_check(const char *checksumsfile)
strncmp(hashstr - 4, ") = ", 4) == 0 &&
strspn(hashstr, "0123456789ABCDEFabcdef") == hashstrlen) {
*(hashstr - 4) = '\0';
filenamelen = hashstr - 4 - filename;
} else if ((size_t)linelen >= hashstrlen + 3 &&
strspn(linebuf, "0123456789ABCDEFabcdef") == hashstrlen &&
linebuf[hashstrlen] == ' ') {
linebuf[hashstrlen] = '\0';
hashstr = linebuf;
filename = linebuf + hashstrlen + 1;
filenamelen = linelen - hashstrlen - 1;
} else {
if (wflag) {
warnx("%s: %d: improperly formatted "
@@ -365,17 +369,23 @@ gnu_check(const char *checksumsfile)
if ((*filename == '*' || *filename == ' ' ||
*filename == 'U' || *filename == '^') &&
lstat(filename, &st) != 0 &&
lstat(filename + 1, &st) == 0) {
rec->filename = strdup(filename + 1);
lstat(filename, &st) != 0) {
rec->input_mode = (enum input_mode)*filename;
filename++;
filenamelen--;
} else {
rec->filename = strdup(filename);
rec->input_mode = input_mode;
}
rec->filename = malloc(filenamelen + 1);
if (rec->filename == NULL)
errx(1, "malloc failed");
if (strnunvis(rec->filename, filenamelen + 1, filename) < 0 ||
(lstat(rec->filename, &st) != 0 && lstat(filename, &st) == 0))
memcpy(rec->filename, filename, filenamelen + 1); // XXX
rec->chksum = strdup(hashstr);
if (rec->chksum == NULL || rec->filename == NULL)
if (rec->chksum == NULL)
errx(1, "malloc failed");
rec->next = NULL;
*next = rec;
@@ -386,6 +396,15 @@ gnu_check(const char *checksumsfile)
fclose(inp);
}
static int
safename(char *namebuf, size_t bufsize, const char *name)
{
int vis_mode = VIS_NL | VIS_TAB | VIS_GLOB | VIS_SHELL;
int vis_style = VIS_CSTYLE | VIS_OCTAL;
return (strnvis(namebuf, bufsize, name, vis_mode | vis_style));
}
/* Main driver.
Arguments (may be any combination):
@@ -633,7 +652,13 @@ main(int argc, char *argv[])
}
if (f == NULL) {
if (errno != ENOENT || !(cflag && ignoreMissing)) {
warn("%s", filename);
char namebuf[MAXPATHLEN * 4];
int serrno = errno;
if (safename(namebuf, sizeof(namebuf), filename) < 0)
warnc(ENAMETOOLONG, NULL);
else
warnc(serrno, "%s", namebuf);
failed = true;
}
continue;
@@ -763,8 +788,12 @@ MDInput(const Algorithm_t *alg, FILE *f, char *buf, bool tee)
static void
MDOutput(const Algorithm_t *alg, char *p, const char *name)
{
char namebuf[MAXPATHLEN * 4];
bool checkfailed = false;
if (safename(namebuf, sizeof(namebuf), name) < 0)
errc(1, ENAMETOOLONG, NULL);
name = namebuf;
if (p == NULL) {
warn("%s", name);
failed = true;