Skip to content

unescape printable characters #3140

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Sep 15, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions ghcide/src/Development/IDE/GHC/Compat/Outputable.hs
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ import qualified Outputable as Out
import SrcLoc
#endif
#if MIN_VERSION_ghc(9,3,0)
import GHC.Utils.Logger
import GHC.Driver.Config.Diagnostic
import Data.Maybe
import Data.Maybe
import GHC.Driver.Config.Diagnostic
import GHC.Utils.Logger
#endif

-- | A compatible function to print `Outputable` instances
Expand Down
11 changes: 8 additions & 3 deletions ghcide/src/Development/IDE/GHC/Util.hs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ import GHC.IO.Exception
import GHC.IO.Handle.Internals
import GHC.IO.Handle.Types
import GHC.Stack
import Ide.PluginUtils (unescape)
import System.Environment.Blank (getEnvDefault)
import System.FilePath
import System.IO.Unsafe
Expand Down Expand Up @@ -288,9 +289,13 @@ instance Outputable SDoc where

-- | Print a GHC value in `defaultUserStyle` without unique symbols.
--
-- This is the most common print utility, will print with a user-friendly style like: `a_a4ME` as `a`.
-- This is the most common print utility, and it will print with a user-friendly style like: `a_a4ME` as `a`.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe also include the fact that it unescapes characters? That's quite an important difference from other outputable functions. In fact... maybe we should use hlint to ban use of other outputable functions apart from this one?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe also include the fact that it unescapes characters?

Done.

maybe we should use hlint to ban use of other outputable functions apart from this one?

TBH I don't know if this is configurable. I think the scheme is to use printOutputable by default, and also allow other print functions to be used when needed.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean, we can use hlint to ban functions, so we could do it. I don't know if it's a good idea, though.

--
-- It internal using `showSDocUnsafe` with `unsafeGlobalDynFlags`.
-- It uses `showSDocUnsafe` with `unsafeGlobalDynFlags` internally.
printOutputable :: Outputable a => a -> T.Text
printOutputable = T.pack . printWithoutUniques
printOutputable =
-- IfaceTyLit from GHC.Iface.Type implements Outputable with 'show'.
-- Showing a String escapes non-ascii printable characters. We unescape it here.
-- More discussion at https://github.com/haskell/haskell-language-server/issues/3115.
unescape . T.pack . printWithoutUniques
{-# INLINE printOutputable #-}
2 changes: 2 additions & 0 deletions hls-plugin-api/hls-plugin-api.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ library
, text
, transformers
, unordered-containers
, megaparsec > 9

if os(windows)
build-depends: Win32
Expand Down Expand Up @@ -90,4 +91,5 @@ test-suite tests
, tasty
, tasty-hunit
, tasty-rerun
, text
, lsp-types
39 changes: 38 additions & 1 deletion hls-plugin-api/src/Ide/PluginUtils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ module Ide.PluginUtils
handleMaybe,
handleMaybeM,
throwPluginError,
unescape,
)
where

Expand All @@ -43,10 +44,11 @@ import Control.Monad.Trans.Except (ExceptT, runExceptT, throwE)
import Data.Algorithm.Diff
import Data.Algorithm.DiffOutput
import Data.Bifunctor (Bifunctor (first))
import Data.Char (isPrint, showLitChar)
import qualified Data.HashMap.Strict as H
import Data.List (find)
import Data.String (IsString (fromString))
import qualified Data.Text as T
import Data.Void (Void)
import Ide.Plugin.Config
import Ide.Plugin.Properties
import Ide.Types
Expand All @@ -57,6 +59,9 @@ import Language.LSP.Types hiding
SemanticTokensEdit (_start))
import qualified Language.LSP.Types as J
import Language.LSP.Types.Capabilities
import qualified Text.Megaparsec as P
import qualified Text.Megaparsec.Char as P
import qualified Text.Megaparsec.Char.Lexer as P

-- ---------------------------------------------------------------------

Expand Down Expand Up @@ -255,3 +260,35 @@ pluginResponse :: Monad m => ExceptT String m a -> m (Either ResponseError a)
pluginResponse =
fmap (first (\msg -> ResponseError InternalError (fromString msg) Nothing))
. runExceptT

-- ---------------------------------------------------------------------

type TextParser = P.Parsec Void T.Text

-- | Unescape printable escape sequences within double quotes.
-- This is useful if you have to call 'show' indirectly, and it escapes some characters which you would prefer to
-- display as is.
unescape :: T.Text -> T.Text
unescape input =
case P.runParser escapedTextParser "inline" input of
Left _ -> input
Right strs -> T.pack strs

-- | Parser for a string that contains double quotes. Returns unescaped string.
escapedTextParser :: TextParser String
escapedTextParser = do
xs <- P.many (P.try stringLiteral)
x <- P.manyTill P.anySingle P.eof -- consume characters after the final double quote
pure $ concat xs ++ x
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It feels a little weird that you handle the content before the opening quote in stringLiteral but the content after the closing quote outside it. Maybe make that consistent?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I rewrote stringLiteral to not include any content outside of the double quotes. I think that's more reasonable.

where
stringLiteral :: TextParser String
stringLiteral = do
before <- P.manyTill P.anySingle (P.char '"') -- include any character before the first double quote
inside <- P.manyTill P.charLiteral (P.char '"')
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about "hell\"ooo"? i.e. escaped quotes probably shouldn't stop us

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, from the tests it looks like you made this work but I'm not sure why it works...

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P.charLiteral calls lookAhead internally. And this is an example from the haddock of charLiteral:

stringLiteral = char '"' >> manyTill L.charLiteral (char '"')

let f '"' = "\\\"" -- double quote should still be escaped
-- Despite the docs, 'showLitChar' and 'showLitString' from 'Data.Char' DOES ESCAPE unicode printable
-- characters. So we need to call 'isPrint' from 'Data.Char' manually.
f ch = if isPrint ch then [ch] else showLitChar ch ""
inside' = concatMap f inside

pure $ before <> "\"" <> inside' <> "\""
26 changes: 24 additions & 2 deletions hls-plugin-api/test/Ide/PluginUtilsTest.hs
Original file line number Diff line number Diff line change
@@ -1,13 +1,35 @@
{-# LANGUAGE OverloadedStrings #-}

module Ide.PluginUtilsTest
( tests
) where

import Ide.PluginUtils (positionInRange)
import Data.Char (isPrint)
import qualified Data.Text as T
import Ide.PluginUtils (positionInRange, unescape)
import Language.LSP.Types (Position (Position), Range (Range))
import Test.Tasty
import Test.Tasty.HUnit

tests :: TestTree
tests = testGroup "PluginUtils"
[
[ unescapeTest
]

unescapeTest :: TestTree
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💯

unescapeTest = testGroup "unescape"
[ testCase "no double quote" $
unescape "hello世界" @?= "hello世界"
, testCase "whole string quoted" $
unescape "\"hello\\19990\\30028\"" @?= "\"hello世界\""
, testCase "text before quotes should not be unescaped" $
unescape "\\19990a\"hello\\30028\"" @?= "\\19990a\"hello界\""
, testCase "some text after quotes" $
unescape "\"hello\\19990\\30028\"abc" @?= "\"hello世界\"abc"
, testCase "many pairs of quote" $
unescape "oo\"hello\\19990\\30028\"abc\"\1087\1088\1080\1074\1077\1090\"hh" @?= "oo\"hello世界\"abc\"привет\"hh"
, testCase "double quote itself should not be unescaped" $
unescape "\"\\\"o\"" @?= "\"\\\"o\""
, testCase "control characters should not be escaped" $
unescape "\"\\n\\t\"" @?= "\"\\n\\t\""
]