Discussion:
[PATCH] posix: if glob has a trailing slash match directories only.
Add Reply
Jonathan Nieder
2017-11-28 22:04:53 UTC
Reply
Permalink
Raw Message
If pattern has a trailing slash match directories only.
This patch fixes [BZ #22513].
+
+ [BZ #22513]
+ * posix/glob.c (glob_in_dir): Make glob with a trailing slash match
+ directores only.
+ * posix/globtest.sh: Add tests.
Tested on x86-64 linux.
Thanks. Looks like a reasonable thing to do.
diff --git a/posix/glob.c b/posix/glob.c
index cb39779d07..78873d83c6 100644
--- a/posix/glob.c
+++ b/posix/glob.c
@@ -1342,7 +1342,33 @@ glob_in_dir (const char *pattern, const char *directory, int flags,
if (flags & GLOB_ONLYDIR)
switch (readdir_result_type (d))
{
- case DT_DIR: case DT_LNK: case DT_UNKNOWN: break;
+ case DT_DIR: case DT_LNK: break;
+ {
+ int dir;
+ size_t namlen = strlen (d.name);
+ size_t fullsize;
+ bool alloca_fullname
+ = (! size_add_wrapv (dirlen + 1, namlen + 1, &fullsize)
+ && glob_use_alloca (alloca_used, fullsize));
+ char *fullname;
+ if (alloca_fullname)
+ fullname = alloca_account (fullsize, alloca_used);
+ else
+ {
+ fullname = malloc (fullsize);
+ if (fullname == NULL)
+ return GLOB_NOSPACE;
+ }
+ mempcpy (mempcpy (mempcpy (fullname, directory, dirlen),
+ "/", 1),
+ d.name, namlen + 1);
+ dir = is_dir (fullname, flags, pglob);
+ if (__glibc_unlikely (!alloca_fullname))
+ free (fullname);
+ if (dir)
+ break;
+ }
default: continue;
}
If I understand correctly, this treats DT_UNKNOWN like DT_DIR when stat
indicates that it is a directory.

What should happen in the DT_LNK case? Should the same logic trip for
it as well so we can distinguish between a symlink to a directory and
other symlinks?
diff --git a/posix/globtest.sh b/posix/globtest.sh
index 73f7ae31cc..4f0c03715a 100755
--- a/posix/globtest.sh
+++ b/posix/globtest.sh
@@ -43,13 +43,19 @@ export LC_ALL
# Create the arena
testdir=${common_objpfx}posix/globtest-dir
+testdir2=${common_objpfx}posix/globtest-dir2
testout=${common_objpfx}posix/globtest-out
rm -rf $testdir $testout
mkdir $testdir
+mkdir $testdir2
+mkdir $testdir2/hello1d
+mkdir $testdir2/hello2d
+echo 1 > $testdir2/hello1f
+echo 2 > $testdir2/hello2f
cleanup() {
chmod 777 $testdir/noread
- rm -fr $testdir $testout
+ rm -fr $testdir $testdir2 $testout
}
trap cleanup 0 HUP INT QUIT TERM
@@ -815,6 +821,41 @@ if test $failed -ne 0; then
result=1
fi
+# Test that if the specified glob ends with a slash then only directories are
+# matched.
+# First run with the glob that has no slash to demonstrate presence of a slash
+# makes a difference.
+failed=0
+${test_program_prefix} \
+${common_objpfx}posix/globtest "$testdir2" "hello*" |
+sort > $testout
+cat <<"EOF" | $CMP - $testout >> $logfile || failed=1
+`hello1d'
+`hello1f'
+`hello2d'
+`hello2f'
+EOF
+
+if test $failed -ne 0; then
+ echo "pattern+meta test failed" >> $logfile
+ result=1
+fi
+
+# The same pattern+meta test with a slash this time.
+failed=0
+${test_program_prefix} \
+${common_objpfx}posix/globtest "$testdir2" "hello*/" |
+sort > $testout
+cat <<"EOF" | $CMP - $testout >> $logfile || failed=1
+`hello1d/'
+`hello2d/'
+EOF
+
+if test $failed -ne 0; then
+ echo "pattern+meta+slash test failed" >> $logfile
+ result=1
+fi
+
if test $result -eq 0; then
echo "All OK." > $logfile
fi
Thanks for the test. It does a good job of motivating the change.

Thanks and hope that helps,
Jonathan
Dmitry Goncharov
2017-11-29 22:05:34 UTC
Reply
Permalink
Raw Message
Post by Jonathan Nieder
If I understand correctly, this treats DT_UNKNOWN like DT_DIR when stat
indicates that it is a directory.
The patch checks if an entry is really a dir if type is DT_UNKNOWN.
Post by Jonathan Nieder
What should happen in the DT_LNK case? Should the same logic trip for
it as well so we can distinguish between a symlink to a directory and
other symlinks?
You are right. i'll add a test and post another patch.


regards, Dmitry
Dmitry Goncharov
2017-11-30 05:01:19 UTC
Reply
Permalink
Raw Message
Post by Jonathan Nieder
What should happen in the DT_LNK case? Should the same logic trip for
it as well so we can distinguish between a symlink to a directory and
other symlinks?
The filesystems that i tested on all return DT_UNKNOWN for a symlink or
hardlink. Even when they return proper type for a file or directory.
However, if the filesystem ever sets type to DT_LNK glob needs to treat it like
DT_UNKNOWN.

regards, Dmitry

diff --git a/ChangeLog b/ChangeLog
index ffeb208132..fb76fc1415 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2017-11-28 Dmitry Goncharov <***@users.sf.net>
+
+ [BZ #22513]
+ * posix/glob.c (glob_in_dir): Make pattern with a trailing slash
+ match directores only.
+ * posix/globtest.sh: Add tests.
+
2017-11-13 Florian Weimer <***@redhat.com>

* support/next_to_fault.h, support/next_to_fault.c: New files.
diff --git a/posix/glob.c b/posix/glob.c
index cb39779d07..47e67907cd 100644
--- a/posix/glob.c
+++ b/posix/glob.c
@@ -1342,7 +1342,33 @@ glob_in_dir (const char *pattern, const char *directory, int flags,
if (flags & GLOB_ONLYDIR)
switch (readdir_result_type (d))
{
- case DT_DIR: case DT_LNK: case DT_UNKNOWN: break;
+ case DT_DIR: break;
+ case DT_LNK: case DT_UNKNOWN:
+ {
+ int dir;
+ size_t namlen = strlen (d.name);
+ size_t fullsize;
+ bool alloca_fullname
+ = (! size_add_wrapv (dirlen + 1, namlen + 1, &fullsize)
+ && glob_use_alloca (alloca_used, fullsize));
+ char *fullname;
+ if (alloca_fullname)
+ fullname = alloca_account (fullsize, alloca_used);
+ else
+ {
+ fullname = malloc (fullsize);
+ if (fullname == NULL)
+ return GLOB_NOSPACE;
+ }
+ mempcpy (mempcpy (mempcpy (fullname, directory, dirlen),
+ "/", 1),
+ d.name, namlen + 1);
+ dir = is_dir (fullname, flags, pglob);
+ if (__glibc_unlikely (!alloca_fullname))
+ free (fullname);
+ if (dir)
+ break;
+ }
default: continue;
}

diff --git a/posix/globtest.sh b/posix/globtest.sh
index 73f7ae31cc..4a062ea507 100755
--- a/posix/globtest.sh
+++ b/posix/globtest.sh
@@ -43,13 +43,22 @@ export LC_ALL

# Create the arena
testdir=${common_objpfx}posix/globtest-dir
+testdir2=${common_objpfx}posix/globtest-dir2
testout=${common_objpfx}posix/globtest-out
rm -rf $testdir $testout
mkdir $testdir
+mkdir $testdir2
+mkdir $testdir2/hello1d
+ln -s $testdir2/hello1d $testdir2/hello1ds
+mkdir $testdir2/hello2d
+echo 1 > $testdir2/hello1f
+ln -s $testdir2/hello1f $testdir2/hello1fs
+echo 2 > $testdir2/hello2f
+ln $testdir2/hello2f $testdir2/hello2fl

cleanup() {
chmod 777 $testdir/noread
- rm -fr $testdir $testout
+ rm -fr $testdir $testdir2 $testout
}

trap cleanup 0 HUP INT QUIT TERM
@@ -815,6 +824,45 @@ if test $failed -ne 0; then
result=1
fi

+# Test that if the specified glob ends with a slash then only directories are
+# matched.
+# First run with the glob that has no slash to demonstrate presence of a slash
+# makes a difference.
+failed=0
+${test_program_prefix} \
+${common_objpfx}posix/globtest "$testdir2" "hello*" |
+sort > $testout
+cat <<"EOF" | $CMP - $testout >> $logfile || failed=1
+`hello1d'
+`hello1ds'
+`hello1f'
+`hello1fs'
+`hello2d'
+`hello2f'
+`hello2fl'
+EOF
+
+if test $failed -ne 0; then
+ echo "pattern+meta test failed" >> $logfile
+ result=1
+fi
+
+# The same pattern+meta test with a slash this time.
+failed=0
+${test_program_prefix} \
+${common_objpfx}posix/globtest "$testdir2" "hello*/" |
+sort > $testout
+cat <<"EOF" | $CMP - $testout >> $logfile || failed=1
+`hello1d/'
+`hello1ds/'
+`hello2d/'
+EOF
+
+if test $failed -ne 0; then
+ echo "pattern+meta+slash test failed" >> $logfile
+ result=1
+fi
+
if test $result -eq 0; then
echo "All OK." > $logfile
fi
Dmitry Goncharov
2017-12-04 04:20:23 UTC
Reply
Permalink
Raw Message
That would be a better approach. Another possibility might be to change
    flags |= GLOB_ONLYDIR | GLOB_MARK;
and then at the end, filter out all matches that aren't marked with
trailing '/'. This would avoid creating a new GLOB_XXX option and would
probably be easier to implement.
Please have a look at this implementation of your idea.

regards, Dmitry

diff --git a/ChangeLog b/ChangeLog
index ffeb208132..1b7e3984d2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2017-11-28 Dmitry Goncharov <***@users.sf.net>
+
+ [BZ #22513]
+ * posix/glob.c (glob_in_dir): Make pattern with a trailing slash
+ match directores only.
+ * posix/globtest.sh: Add tests.
+ * posix/globtest.c: Print gl_pathv before errors.
+
2017-11-13 Florian Weimer <***@redhat.com>

* support/next_to_fault.h, support/next_to_fault.c: New files.
diff --git a/posix/glob.c b/posix/glob.c
index cb39779d07..10756f1f92 100644
--- a/posix/glob.c
+++ b/posix/glob.c
@@ -1124,8 +1124,11 @@ __glob (const char *pattern, int flags, int (*errfunc) (const char *, int),
{
/* Append slashes to directory names. */
size_t i;
+ int filter;
+ filter = 0;

for (i = oldcount; i < pglob->gl_pathc + pglob->gl_offs; ++i)
+ {
if (is_dir (pglob->gl_pathv[i], flags, pglob))
{
size_t len = strlen (pglob->gl_pathv[i]) + 2;
@@ -1140,6 +1143,77 @@ __glob (const char *pattern, int flags, int (*errfunc) (const char *, int),
strcpy (&new[len - 2], "/");
pglob->gl_pathv[i] = new;
}
+ else if (flags & GLOB_ONLYDIR)
+ {
+ /* Either the caller specified GLOB_MARK and GLOB_ONLYDIR or the
+ caller specified the pattern that has a trailing slash.
+ Filter out everything, but directories. */
+ free (pglob->gl_pathv[i]);
+ pglob->gl_pathv[i] = NULL;
+ filter = 1;
+ }
+ }
+ if (filter)
+ {
+ /* After the first pass above gl_pathv has full elements (directories)
+ and empty elements.
+ This algorithm partitions gl_pathv to have all full elements in the
+ beginning followed by all empty elements at the end and then sets
+ gl_pathc to the number of the full elements.
+
+ Assume the case of 2 elements offsetted by the caller and 2 more matches
+ found by prior calls to glob with GLOB_APPEND and 3 more matches found
+ this time.
+ offs = 2, oldcount = 4, pathc = 5.
+
+ Initial position.
+ gl_pathv e f
+ | | |
+ v v v
+ [ ][ ] [ ][ ][ ][ ][ ]
+ <----><-------------->
+ offs pathc
+ <----------->
+ oldcount
+ From this position e will be advancing to the next empty element
+ and f will be backtracking to the last full element. At each
+ iteration contents of e and f will be swapped.
+ */
+
+ size_t n;
+ char **e, **f;
+ n = pglob->gl_pathc + pglob->gl_offs;
+ assert (oldcount < n); /* Otherwise filter'd be 0. */
+ /* f points at the last element of gl_pathv. */
+ f = pglob->gl_pathv + n - 1;
+ /* e points at gl_pathv[oldcount]. */
+ e = pglob->gl_pathv + oldcount;
+ assert (f >= e);
+ assert (f >= pglob->gl_pathv); /* Otherwise filter'd be 0. */
+ for (;;)
+ {
+ assert (f >= pglob->gl_pathv);
+ assert (e < pglob->gl_pathv + n);
+ /* Advance e to point at the first empty element. */
+ while (*e != NULL && e < f)
+ ++e;
+ if (e >= f)
+ break;
+ /* Backtrack f to point at the last full element. */
+ while (*f == NULL && f > e)
+ --f;
+ if (e >= f)
+ break;
+ assert (f > e);
+ *e = *f;
+ *f = NULL;
+ }
+ /* If there are full elements in gl_pathv then f points at the last
+ one. Otherwise f points at gl_pathv[oldcount]. */
+ pglob->gl_pathc = f - pglob->gl_pathv - pglob->gl_offs;
+ if (pglob->gl_pathc == 0)
+ retval = GLOB_NOMATCH;
+ }
}

if (!(flags & GLOB_NOSORT))
diff --git a/posix/globtest.c b/posix/globtest.c
index 878ae33044..0a94550d26 100644
--- a/posix/globtest.c
+++ b/posix/globtest.c
@@ -93,14 +93,6 @@ main (int argc, char *argv[])
glob_flags |= GLOB_APPEND;
}

- /* Was there an error? */
- if (i == GLOB_NOSPACE)
- puts ("GLOB_NOSPACE");
- else if (i == GLOB_ABORTED)
- puts ("GLOB_ABORTED");
- else if (i == GLOB_NOMATCH)
- puts ("GLOB_NOMATCH");
-
/* If we set an offset, fill in the first field.
(Unless glob() has filled it in already - which is an error) */
if ((glob_flags & GLOB_DOOFFS) && g.gl_pathv[0] == NULL)
@@ -109,10 +101,25 @@ main (int argc, char *argv[])
/* Print out the names. Unless otherwise specified, qoute them. */
if (g.gl_pathv)
{
- for (i = 0; i < g.gl_offs + g.gl_pathc; ++i)
+ int k;
+ for (k = 0; k < g.gl_offs + g.gl_pathc; ++k)
printf ("%s%s%s\n", quotes ? "`" : "",
- g.gl_pathv[i] ? g.gl_pathv[i] : "(null)",
+ g.gl_pathv[k] ? g.gl_pathv[k] : "(null)",
quotes ? "'" : "");
+
}
+
+ /* Was there an error?
+ * Printing the error after printing g.gl_pathv simplifies the test suite,
+ * because when -o is used 'abc' is always printed first regardless of
+ * whether the pattern matches anyting.
+ */
+ if (i == GLOB_NOSPACE)
+ puts ("GLOB_NOSPACE");
+ else if (i == GLOB_ABORTED)
+ puts ("GLOB_ABORTED");
+ else if (i == GLOB_NOMATCH)
+ puts ("GLOB_NOMATCH");
+
return 0;
}
diff --git a/posix/globtest.sh b/posix/globtest.sh
index 73f7ae31cc..bc86fcc232 100755
--- a/posix/globtest.sh
+++ b/posix/globtest.sh
@@ -43,13 +43,22 @@ export LC_ALL

# Create the arena
testdir=${common_objpfx}posix/globtest-dir
+tdir2=${common_objpfx}posix/globtest-dir2
testout=${common_objpfx}posix/globtest-out
-rm -rf $testdir $testout
+rm -rf $testdir $tdir2 $testout
mkdir $testdir
+mkdir $tdir2
+mkdir $tdir2/hello1d
+ln -s $tdir2/hello1d $tdir2/hello1ds
+mkdir $tdir2/hello2d
+echo 1 > $tdir2/hello1f
+ln -s $tdir2/hello1f $tdir2/hello1fs
+echo 2 > $tdir2/hello2f
+ln $tdir2/hello2f $tdir2/hello2fl

cleanup() {
chmod 777 $testdir/noread
- rm -fr $testdir $testout
+ rm -fr $testdir $tdir2 $testout
}

trap cleanup 0 HUP INT QUIT TERM
@@ -74,6 +83,30 @@ echo 1_1 > $testdir/dir1/file1_1
echo 1_2 > $testdir/dir1/file1_2
ln -fs dir1 $testdir/link1

+trailing_slash_test()
+{
+ local dir="$1"
+ local pattern="$2"
+ local expected="$3"
+ failed=0
+ ${test_program_prefix} \
+ ${common_objpfx}posix/globtest -q "$dir" "$pattern" | sort -fd > $testout
+ echo -e "$expected" | $CMP - $testout >> $logfile || failed=1
+ if test $failed -ne 0; then
+ echo "$pattern test failed" >> $logfile
+ result=1
+ fi
+
+ failed=0
+ ${test_program_prefix} \
+ ${common_objpfx}posix/globtest -qo "$dir" "$pattern" | sort -fd > $testout
+ echo -e "abc\n$expected" | $CMP - $testout >> $logfile || failed=1
+ if test $failed -ne 0; then
+ echo "$pattern with dooffs test failed" >> $logfile
+ result=1
+ fi
+}
+
# Run some tests.
result=0
rm -f $logfile
@@ -692,6 +725,22 @@ if test $failed -ne 0; then
result=1
fi

+# Try multiple patterns (GLOB_APPEND) with offset (GLOB_DOOFFS)
+# The pattern has a trailing slash which eliminates files in the
+# subdirectories.
+failed=0
+${test_program_prefix} \
+${common_objpfx}posix/globtest -o "$testdir" "file1" "*/*/" |
+sort > $testout
+cat <<"EOF" | $CMP - $testout >> $logfile || failed=1
+`abc'
+`file1'
+EOF
+if test $failed -ne 0; then
+ echo "GLOB_APPEND2 with slash test failed" >> $logfile
+ result=1
+fi
+
# Test NOCHECK with non-existing file in subdir.
failed=0
${test_program_prefix} \
@@ -815,6 +864,93 @@ if test $failed -ne 0; then
result=1
fi

+# This code tests the algo that filters out non directories when the pattern
+# has a trailing slash.
+# There are at least the following cases
+# - The pattern matches files and directories.
+# - The pattern matches nothing.
+# - The pattern matches only files.
+# - The pattern matches 1 file.
+# - The pattern matches only directories.
+# - The pattern matches 1 directory.
+# Each test is run twice, with and without a trailing slash to demonstrate
+# the differentce that the slash makes.
+# Each test is also run from the target directory by doing chdir and from the
+# current directory by specifying the full path in the pattern.
+# Each test is also run with and without dooffs.
+
+# The pattern matches files and directories.
+pattern='hello*'
+expected="hello1d\nhello1ds\nhello1f\nhello1fs\nhello2d\nhello2f\nhello2fl"
+trailing_slash_test "$tdir2" "$pattern" "$expected"
+
+expected="hello1d/\nhello1ds/\nhello2d/"
+trailing_slash_test "$tdir2" "$pattern/" "$expected"
+
+pattern="$tdir2/hello*"
+expected="$tdir2/hello1d\n$tdir2/hello1ds\n$tdir2/hello1f\n\
+$tdir2/hello1fs\n$tdir2/hello2d\n$tdir2/hello2f\n$tdir2/hello2fl"
+trailing_slash_test . "$pattern" "$expected"
+
+expected="$tdir2/hello1d/\n$tdir2/hello1ds/\n$tdir2/hello2d/"
+trailing_slash_test . "$pattern/" "$expected"
+
+# The pattern matches nothing.
+pattern='nomatch*'
+trailing_slash_test "$tdir2" "$pattern" GLOB_NOMATCH
+trailing_slash_test "$tdir2" "$pattern/" GLOB_NOMATCH
+trailing_slash_test . "$pattern" GLOB_NOMATCH
+trailing_slash_test . "$pattern/" GLOB_NOMATCH
+
+# The pattern matches only files.
+pattern='hello?f*'
+expected="hello1f\nhello1fs\nhello2f\nhello2fl"
+trailing_slash_test "$tdir2" "$pattern" "$expected"
+trailing_slash_test "$tdir2" "$pattern/" GLOB_NOMATCH
+
+pattern="$tdir2/hello?f*"
+expected="$tdir2/hello1f\n$tdir2/hello1fs\n$tdir2/hello2f\n$tdir2/hello2fl"
+trailing_slash_test . "$pattern" "$expected"
+trailing_slash_test . "$pattern/" GLOB_NOMATCH
+
+# The pattern matches only 1 file.
+pattern='hello*fl'
+expected="hello2fl"
+trailing_slash_test "$tdir2" "$pattern" "$expected"
+trailing_slash_test "$tdir2" "$pattern/" GLOB_NOMATCH
+
+pattern="$tdir2/hello*fl"
+expected="$tdir2/hello2fl"
+trailing_slash_test . "$pattern" "$expected"
+trailing_slash_test . "$pattern/" GLOB_NOMATCH
+
+# The pattern matches only directores.
+pattern='hello?d*'
+expected="hello1d\nhello1ds\nhello2d"
+trailing_slash_test "$tdir2" "$pattern" "$expected"
+
+expected="hello1d/\nhello1ds/\nhello2d/"
+trailing_slash_test "$tdir2" "$pattern/" "$expected"
+
+pattern="$tdir2/hello?d*"
+expected="$tdir2/hello1d\n$tdir2/hello1ds\n$tdir2/hello2d"
+trailing_slash_test . "$pattern" "$expected"
+
+expected="$tdir2/hello1d/\n$tdir2/hello1ds/\n$tdir2/hello2d/"
+trailing_slash_test . "$pattern/" "$expected"
+
+# The pattern matches only 1 directory.
+pattern='hello*ds'
+expected="hello1ds"
+trailing_slash_test "$tdir2" "$pattern" "$expected"
+trailing_slash_test "$tdir2" "$pattern/" "$expected/"
+
+pattern="$tdir2/hello*ds"
+expected="$tdir2/hello1ds"
+trailing_slash_test . "$pattern" "$expected"
+trailing_slash_test . "$pattern/" "$expected/"
+
+
if test $result -eq 0; then
echo "All OK." > $logfile
fi
Paul Eggert
2017-12-04 08:01:04 UTC
Reply
Permalink
Raw Message
Post by Dmitry Goncharov
    flags |= GLOB_ONLYDIR | GLOB_MARK;
and then at the end, filter out all matches that aren't marked with
trailing '/'. This would avoid creating a new GLOB_XXX option and would
probably be easier to implement.
Please have a look at this implementation of your idea.
I'm not quite following how it's an implementation, since I don't see where it
does anything like "flags |= GLOB_ONLYDIR | GLOB_MARK;". Maybe there's another
part of the patch you're missing?

The variable "filter" is a boolean and should be of type bool.

That comment and code look over-complicated. Can't you simply copy nonnull
entries in-place, in a single pass? That way, the filtered order will be the
same as the original order.
Dmitry Goncharov
2017-12-04 15:46:23 UTC
Reply
Permalink
Raw Message
Post by Paul Eggert
Post by Dmitry Goncharov
flags |= GLOB_ONLYDIR | GLOB_MARK;
and then at the end, filter out all matches that aren't marked with
trailing '/'. This would avoid creating a new GLOB_XXX option and would
probably be easier to implement.
Please have a look at this implementation of your idea.
I'm not quite following how it's an implementation, since I don't see where
it does anything like "flags |= GLOB_ONLYDIR | GLOB_MARK;". Maybe there's
another part of the patch you're missing?
glob sets GLOB_MARK when it calls itself recursively when the pattern
has a trailing slash.
Post by Paul Eggert
The variable "filter" is a boolean and should be of type bool.
sure
Post by Paul Eggert
That comment and code look over-complicated.
i agree. Will remove the comment.
Post by Paul Eggert
Can't you simply copy nonnull entries in-place, in a single pass?
The first pass calls is_dir and frees and resets files. Do you mean to
also do copying in this very pass?
Post by Paul Eggert
That way, the filtered order will be the same as the original order.
The stable algorithm results in more iterations and more copying. Are you sure?

regards, Dmitry
Paul Eggert
2017-12-04 19:32:06 UTC
Reply
Permalink
Raw Message
Post by Dmitry Goncharov
Post by Paul Eggert
Can't you simply copy nonnull entries in-place, in a single pass?
The first pass calls is_dir and frees and resets files. Do you mean to
also do copying in this very pass?
I would think that would be easy, yes.
Post by Dmitry Goncharov
Post by Paul Eggert
That way, the filtered order will be the same as the original order.
The stable algorithm results in more iterations and more copying. Are you sure?
Sorry, I'm not seeing why an extra iteration would be needed. I expect
users would prefer that this bug-fix does not alter the order of
returned values.
Dmitry Goncharov
2017-12-05 04:58:03 UTC
Reply
Permalink
Raw Message
Post by Paul Eggert
Post by Dmitry Goncharov
The stable algorithm results in more iterations and more copying. Are you sure?
Sorry, I'm not seeing why an extra iteration would be needed. I expect
users would prefer that this bug-fix does not alter the order of
returned values.
i was comparing 2 pass algorithms.
Please have a look at this one pass implementaion.

regards, Dmitry

diff --git a/ChangeLog b/ChangeLog
index ffeb208132..1b7e3984d2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2017-11-28 Dmitry Goncharov <***@users.sf.net>
+
+ [BZ #22513]
+ * posix/glob.c (glob_in_dir): Make pattern with a trailing slash
+ match directores only.
+ * posix/globtest.sh: Add tests.
+ * posix/globtest.c: Print gl_pathv before errors.
+
2017-11-13 Florian Weimer <***@redhat.com>

* support/next_to_fault.h, support/next_to_fault.c: New files.
diff --git a/posix/glob.c b/posix/glob.c
index cb39779d07..c63d628c59 100644
--- a/posix/glob.c
+++ b/posix/glob.c
@@ -1123,9 +1123,10 @@ __glob (const char *pattern, int flags, int (*errfunc) (const char *, int),
if (flags & GLOB_MARK)
{
/* Append slashes to directory names. */
- size_t i;
+ size_t i, e;

- for (i = oldcount; i < pglob->gl_pathc + pglob->gl_offs; ++i)
+ for (i = e = oldcount; i < pglob->gl_pathc + pglob->gl_offs; ++i)
+ {
if (is_dir (pglob->gl_pathv[i], flags, pglob))
{
size_t len = strlen (pglob->gl_pathv[i]) + 2;
@@ -1138,8 +1139,26 @@ __glob (const char *pattern, int flags, int (*errfunc) (const char *, int),
goto out;
}
strcpy (&new[len - 2], "/");
+ if (pglob->gl_pathv[e] == NULL)
+ {
+ pglob->gl_pathv[e++] = new;
+ pglob->gl_pathv[i] = NULL;
+ }
+ else
pglob->gl_pathv[i] = new;
}
+ else if (flags & GLOB_ONLYDIR)
+ {
+ free (pglob->gl_pathv[i]);
+ pglob->gl_pathv[i] = NULL;
+ if (pglob->gl_pathv[e] != NULL)
+ e = i;
+ }
+ }
+ if (pglob->gl_pathv[e] == NULL)
+ pglob->gl_pathc = e - pglob->gl_offs;
+ if (pglob->gl_pathc == 0)
+ retval = GLOB_NOMATCH;
}

if (!(flags & GLOB_NOSORT))
diff --git a/posix/globtest.c b/posix/globtest.c
index 878ae33044..0a94550d26 100644
--- a/posix/globtest.c
+++ b/posix/globtest.c
@@ -93,14 +93,6 @@ main (int argc, char *argv[])
glob_flags |= GLOB_APPEND;
}

- /* Was there an error? */
- if (i == GLOB_NOSPACE)
- puts ("GLOB_NOSPACE");
- else if (i == GLOB_ABORTED)
- puts ("GLOB_ABORTED");
- else if (i == GLOB_NOMATCH)
- puts ("GLOB_NOMATCH");
-
/* If we set an offset, fill in the first field.
(Unless glob() has filled it in already - which is an error) */
if ((glob_flags & GLOB_DOOFFS) && g.gl_pathv[0] == NULL)
@@ -109,10 +101,25 @@ main (int argc, char *argv[])
/* Print out the names. Unless otherwise specified, qoute them. */
if (g.gl_pathv)
{
- for (i = 0; i < g.gl_offs + g.gl_pathc; ++i)
+ int k;
+ for (k = 0; k < g.gl_offs + g.gl_pathc; ++k)
printf ("%s%s%s\n", quotes ? "`" : "",
- g.gl_pathv[i] ? g.gl_pathv[i] : "(null)",
+ g.gl_pathv[k] ? g.gl_pathv[k] : "(null)",
quotes ? "'" : "");
+
}
+
+ /* Was there an error?
+ * Printing the error after printing g.gl_pathv simplifies the test suite,
+ * because when -o is used 'abc' is always printed first regardless of
+ * whether the pattern matches anyting.
+ */
+ if (i == GLOB_NOSPACE)
+ puts ("GLOB_NOSPACE");
+ else if (i == GLOB_ABORTED)
+ puts ("GLOB_ABORTED");
+ else if (i == GLOB_NOMATCH)
+ puts ("GLOB_NOMATCH");
+
return 0;
}
diff --git a/posix/globtest.sh b/posix/globtest.sh
index 73f7ae31cc..bc86fcc232 100755
--- a/posix/globtest.sh
+++ b/posix/globtest.sh
@@ -43,13 +43,22 @@ export LC_ALL

# Create the arena
testdir=${common_objpfx}posix/globtest-dir
+tdir2=${common_objpfx}posix/globtest-dir2
testout=${common_objpfx}posix/globtest-out
-rm -rf $testdir $testout
+rm -rf $testdir $tdir2 $testout
mkdir $testdir
+mkdir $tdir2
+mkdir $tdir2/hello1d
+ln -s $tdir2/hello1d $tdir2/hello1ds
+mkdir $tdir2/hello2d
+echo 1 > $tdir2/hello1f
+ln -s $tdir2/hello1f $tdir2/hello1fs
+echo 2 > $tdir2/hello2f
+ln $tdir2/hello2f $tdir2/hello2fl

cleanup() {
chmod 777 $testdir/noread
- rm -fr $testdir $testout
+ rm -fr $testdir $tdir2 $testout
}

trap cleanup 0 HUP INT QUIT TERM
@@ -74,6 +83,30 @@ echo 1_1 > $testdir/dir1/file1_1
echo 1_2 > $testdir/dir1/file1_2
ln -fs dir1 $testdir/link1

+trailing_slash_test()
+{
+ local dir="$1"
+ local pattern="$2"
+ local expected="$3"
+ failed=0
+ ${test_program_prefix} \
+ ${common_objpfx}posix/globtest -q "$dir" "$pattern" | sort -fd > $testout
+ echo -e "$expected" | $CMP - $testout >> $logfile || failed=1
+ if test $failed -ne 0; then
+ echo "$pattern test failed" >> $logfile
+ result=1
+ fi
+
+ failed=0
+ ${test_program_prefix} \
+ ${common_objpfx}posix/globtest -qo "$dir" "$pattern" | sort -fd > $testout
+ echo -e "abc\n$expected" | $CMP - $testout >> $logfile || failed=1
+ if test $failed -ne 0; then
+ echo "$pattern with dooffs test failed" >> $logfile
+ result=1
+ fi
+}
+
# Run some tests.
result=0
rm -f $logfile
@@ -692,6 +725,22 @@ if test $failed -ne 0; then
result=1
fi

+# Try multiple patterns (GLOB_APPEND) with offset (GLOB_DOOFFS)
+# The pattern has a trailing slash which eliminates files in the
+# subdirectories.
+failed=0
+${test_program_prefix} \
+${common_objpfx}posix/globtest -o "$testdir" "file1" "*/*/" |
+sort > $testout
+cat <<"EOF" | $CMP - $testout >> $logfile || failed=1
+`abc'
+`file1'
+EOF
+if test $failed -ne 0; then
+ echo "GLOB_APPEND2 with slash test failed" >> $logfile
+ result=1
+fi
+
# Test NOCHECK with non-existing file in subdir.
failed=0
${test_program_prefix} \
@@ -815,6 +864,93 @@ if test $failed -ne 0; then
result=1
fi

+# This code tests the algo that filters out non directories when the pattern
+# has a trailing slash.
+# There are at least the following cases
+# - The pattern matches files and directories.
+# - The pattern matches nothing.
+# - The pattern matches only files.
+# - The pattern matches 1 file.
+# - The pattern matches only directories.
+# - The pattern matches 1 directory.
+# Each test is run twice, with and without a trailing slash to demonstrate
+# the differentce that the slash makes.
+# Each test is also run from the target directory by doing chdir and from the
+# current directory by specifying the full path in the pattern.
+# Each test is also run with and without dooffs.
+
+# The pattern matches files and directories.
+pattern='hello*'
+expected="hello1d\nhello1ds\nhello1f\nhello1fs\nhello2d\nhello2f\nhello2fl"
+trailing_slash_test "$tdir2" "$pattern" "$expected"
+
+expected="hello1d/\nhello1ds/\nhello2d/"
+trailing_slash_test "$tdir2" "$pattern/" "$expected"
+
+pattern="$tdir2/hello*"
+expected="$tdir2/hello1d\n$tdir2/hello1ds\n$tdir2/hello1f\n\
+$tdir2/hello1fs\n$tdir2/hello2d\n$tdir2/hello2f\n$tdir2/hello2fl"
+trailing_slash_test . "$pattern" "$expected"
+
+expected="$tdir2/hello1d/\n$tdir2/hello1ds/\n$tdir2/hello2d/"
+trailing_slash_test . "$pattern/" "$expected"
+
+# The pattern matches nothing.
+pattern='nomatch*'
+trailing_slash_test "$tdir2" "$pattern" GLOB_NOMATCH
+trailing_slash_test "$tdir2" "$pattern/" GLOB_NOMATCH
+trailing_slash_test . "$pattern" GLOB_NOMATCH
+trailing_slash_test . "$pattern/" GLOB_NOMATCH
+
+# The pattern matches only files.
+pattern='hello?f*'
+expected="hello1f\nhello1fs\nhello2f\nhello2fl"
+trailing_slash_test "$tdir2" "$pattern" "$expected"
+trailing_slash_test "$tdir2" "$pattern/" GLOB_NOMATCH
+
+pattern="$tdir2/hello?f*"
+expected="$tdir2/hello1f\n$tdir2/hello1fs\n$tdir2/hello2f\n$tdir2/hello2fl"
+trailing_slash_test . "$pattern" "$expected"
+trailing_slash_test . "$pattern/" GLOB_NOMATCH
+
+# The pattern matches only 1 file.
+pattern='hello*fl'
+expected="hello2fl"
+trailing_slash_test "$tdir2" "$pattern" "$expected"
+trailing_slash_test "$tdir2" "$pattern/" GLOB_NOMATCH
+
+pattern="$tdir2/hello*fl"
+expected="$tdir2/hello2fl"
+trailing_slash_test . "$pattern" "$expected"
+trailing_slash_test . "$pattern/" GLOB_NOMATCH
+
+# The pattern matches only directores.
+pattern='hello?d*'
+expected="hello1d\nhello1ds\nhello2d"
+trailing_slash_test "$tdir2" "$pattern" "$expected"
+
+expected="hello1d/\nhello1ds/\nhello2d/"
+trailing_slash_test "$tdir2" "$pattern/" "$expected"
+
+pattern="$tdir2/hello?d*"
+expected="$tdir2/hello1d\n$tdir2/hello1ds\n$tdir2/hello2d"
+trailing_slash_test . "$pattern" "$expected"
+
+expected="$tdir2/hello1d/\n$tdir2/hello1ds/\n$tdir2/hello2d/"
+trailing_slash_test . "$pattern/" "$expected"
+
+# The pattern matches only 1 directory.
+pattern='hello*ds'
+expected="hello1ds"
+trailing_slash_test "$tdir2" "$pattern" "$expected"
+trailing_slash_test "$tdir2" "$pattern/" "$expected/"
+
+pattern="$tdir2/hello*ds"
+expected="$tdir2/hello1ds"
+trailing_slash_test . "$pattern" "$expected"
+trailing_slash_test . "$pattern/" "$expected/"
+
+
if test $result -eq 0; then
echo "All OK." > $logfile
fi

Loading...