diff --git a/.github/workflows/haskell-ci.yml b/.github/workflows/haskell-ci.yml new file mode 100644 index 0000000..5438079 --- /dev/null +++ b/.github/workflows/haskell-ci.yml @@ -0,0 +1,269 @@ +# This GitHub workflow config has been generated by a script via +# +# haskell-ci 'github' 'cabal.project' +# +# To regenerate the script (for example after adjusting tested-with) run +# +# haskell-ci regenerate +# +# For more information, see https://github.com/haskell-CI/haskell-ci +# +# version: 0.15.20230321 +# +# REGENDATA ("0.15.20230321",["github","cabal.project"]) +# +name: Haskell-CI +on: + push: + branches: + - master + - ci-* + pull_request: + branches: + - master + - ci-* +jobs: + linux: + name: Haskell-CI - Linux - ${{ matrix.compiler }} + runs-on: ubuntu-20.04 + timeout-minutes: + 60 + container: + image: buildpack-deps:bionic + continue-on-error: ${{ matrix.allow-failure }} + strategy: + matrix: + include: + - compiler: ghc-9.6.1 + compilerKind: ghc + compilerVersion: 9.6.1 + setup-method: ghcup + allow-failure: false + - compiler: ghc-9.4.4 + compilerKind: ghc + compilerVersion: 9.4.4 + setup-method: ghcup + allow-failure: false + - compiler: ghc-9.2.7 + compilerKind: ghc + compilerVersion: 9.2.7 + setup-method: ghcup + allow-failure: false + - compiler: ghc-9.0.2 + compilerKind: ghc + compilerVersion: 9.0.2 + setup-method: ghcup + allow-failure: false + - compiler: ghc-8.10.7 + compilerKind: ghc + compilerVersion: 8.10.7 + setup-method: ghcup + allow-failure: false + - compiler: ghc-8.8.4 + compilerKind: ghc + compilerVersion: 8.8.4 + setup-method: hvr-ppa + allow-failure: false + - compiler: ghc-8.6.5 + compilerKind: ghc + compilerVersion: 8.6.5 + setup-method: hvr-ppa + allow-failure: false + - compiler: ghc-8.4.4 + compilerKind: ghc + compilerVersion: 8.4.4 + setup-method: hvr-ppa + allow-failure: false + - compiler: ghc-8.2.2 + compilerKind: ghc + compilerVersion: 8.2.2 + setup-method: hvr-ppa + allow-failure: false + - compiler: ghc-8.0.2 + compilerKind: ghc + compilerVersion: 8.0.2 + setup-method: hvr-ppa + allow-failure: false + - compiler: ghc-7.10.3 + compilerKind: ghc + compilerVersion: 7.10.3 + setup-method: hvr-ppa + allow-failure: false + fail-fast: false + steps: + - name: apt + run: | + apt-get update + apt-get install -y --no-install-recommends gnupg ca-certificates dirmngr curl git software-properties-common libtinfo5 + if [ "${{ matrix.setup-method }}" = ghcup ]; then + mkdir -p "$HOME/.ghcup/bin" + curl -sL https://downloads.haskell.org/ghcup/0.1.19.2/x86_64-linux-ghcup-0.1.19.2 > "$HOME/.ghcup/bin/ghcup" + chmod a+x "$HOME/.ghcup/bin/ghcup" + "$HOME/.ghcup/bin/ghcup" install ghc "$HCVER" || (cat "$HOME"/.ghcup/logs/*.* && false) + "$HOME/.ghcup/bin/ghcup" install cabal 3.10.1.0 || (cat "$HOME"/.ghcup/logs/*.* && false) + else + apt-add-repository -y 'ppa:hvr/ghc' + apt-get update + apt-get install -y "$HCNAME" + mkdir -p "$HOME/.ghcup/bin" + curl -sL https://downloads.haskell.org/ghcup/0.1.19.2/x86_64-linux-ghcup-0.1.19.2 > "$HOME/.ghcup/bin/ghcup" + chmod a+x "$HOME/.ghcup/bin/ghcup" + "$HOME/.ghcup/bin/ghcup" install cabal 3.10.1.0 || (cat "$HOME"/.ghcup/logs/*.* && false) + fi + env: + HCKIND: ${{ matrix.compilerKind }} + HCNAME: ${{ matrix.compiler }} + HCVER: ${{ matrix.compilerVersion }} + - name: Set PATH and environment variables + run: | + echo "$HOME/.cabal/bin" >> $GITHUB_PATH + echo "LANG=C.UTF-8" >> "$GITHUB_ENV" + echo "CABAL_DIR=$HOME/.cabal" >> "$GITHUB_ENV" + echo "CABAL_CONFIG=$HOME/.cabal/config" >> "$GITHUB_ENV" + HCDIR=/opt/$HCKIND/$HCVER + if [ "${{ matrix.setup-method }}" = ghcup ]; then + HC=$HOME/.ghcup/bin/$HCKIND-$HCVER + echo "HC=$HC" >> "$GITHUB_ENV" + echo "HCPKG=$HOME/.ghcup/bin/$HCKIND-pkg-$HCVER" >> "$GITHUB_ENV" + echo "HADDOCK=$HOME/.ghcup/bin/haddock-$HCVER" >> "$GITHUB_ENV" + echo "CABAL=$HOME/.ghcup/bin/cabal-3.10.1.0 -vnormal+nowrap" >> "$GITHUB_ENV" + else + HC=$HCDIR/bin/$HCKIND + echo "HC=$HC" >> "$GITHUB_ENV" + echo "HCPKG=$HCDIR/bin/$HCKIND-pkg" >> "$GITHUB_ENV" + echo "HADDOCK=$HCDIR/bin/haddock" >> "$GITHUB_ENV" + echo "CABAL=$HOME/.ghcup/bin/cabal-3.10.1.0 -vnormal+nowrap" >> "$GITHUB_ENV" + fi + + HCNUMVER=$(${HC} --numeric-version|perl -ne '/^(\d+)\.(\d+)\.(\d+)(\.(\d+))?$/; print(10000 * $1 + 100 * $2 + ($3 == 0 ? $5 != 1 : $3))') + echo "HCNUMVER=$HCNUMVER" >> "$GITHUB_ENV" + echo "ARG_TESTS=--enable-tests" >> "$GITHUB_ENV" + echo "ARG_BENCH=--enable-benchmarks" >> "$GITHUB_ENV" + echo "HEADHACKAGE=false" >> "$GITHUB_ENV" + echo "ARG_COMPILER=--$HCKIND --with-compiler=$HC" >> "$GITHUB_ENV" + echo "GHCJSARITH=0" >> "$GITHUB_ENV" + env: + HCKIND: ${{ matrix.compilerKind }} + HCNAME: ${{ matrix.compiler }} + HCVER: ${{ matrix.compilerVersion }} + - name: env + run: | + env + - name: write cabal config + run: | + mkdir -p $CABAL_DIR + cat >> $CABAL_CONFIG <> $CABAL_CONFIG < cabal-plan.xz + echo 'de73600b1836d3f55e32d80385acc055fd97f60eaa0ab68a755302685f5d81bc cabal-plan.xz' | sha256sum -c - + xz -d < cabal-plan.xz > $HOME/.cabal/bin/cabal-plan + rm -f cabal-plan.xz + chmod a+x $HOME/.cabal/bin/cabal-plan + cabal-plan --version + - name: checkout + uses: actions/checkout@v3 + with: + path: source + - name: initial cabal.project for sdist + run: | + touch cabal.project + echo "packages: $GITHUB_WORKSPACE/source/language-python" >> cabal.project + echo "packages: $GITHUB_WORKSPACE/source/language-python-test" >> cabal.project + cat cabal.project + - name: sdist + run: | + mkdir -p sdist + $CABAL sdist all --output-dir $GITHUB_WORKSPACE/sdist + - name: unpack + run: | + mkdir -p unpacked + find sdist -maxdepth 1 -type f -name '*.tar.gz' -exec tar -C $GITHUB_WORKSPACE/unpacked -xzvf {} \; + - name: generate cabal.project + run: | + PKGDIR_language_python="$(find "$GITHUB_WORKSPACE/unpacked" -maxdepth 1 -type d -regex '.*/language-python-[0-9.]*')" + echo "PKGDIR_language_python=${PKGDIR_language_python}" >> "$GITHUB_ENV" + PKGDIR_language_python_test="$(find "$GITHUB_WORKSPACE/unpacked" -maxdepth 1 -type d -regex '.*/language-python-test-[0-9.]*')" + echo "PKGDIR_language_python_test=${PKGDIR_language_python_test}" >> "$GITHUB_ENV" + rm -f cabal.project cabal.project.local + touch cabal.project + touch cabal.project.local + echo "packages: ${PKGDIR_language_python}" >> cabal.project + echo "packages: ${PKGDIR_language_python_test}" >> cabal.project + if [ $((HCNUMVER >= 80200)) -ne 0 ] ; then echo "package language-python" >> cabal.project ; fi + if [ $((HCNUMVER >= 80200)) -ne 0 ] ; then echo " ghc-options: -Werror=missing-methods" >> cabal.project ; fi + if [ $((HCNUMVER >= 80200)) -ne 0 ] ; then echo "package language-python-test" >> cabal.project ; fi + if [ $((HCNUMVER >= 80200)) -ne 0 ] ; then echo " ghc-options: -Werror=missing-methods" >> cabal.project ; fi + cat >> cabal.project <> cabal.project.local + cat cabal.project + cat cabal.project.local + - name: dump install plan + run: | + $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dry-run all + cabal-plan + - name: restore cache + uses: actions/cache/restore@v3 + with: + key: ${{ runner.os }}-${{ matrix.compiler }}-${{ github.sha }} + path: ~/.cabal/store + restore-keys: ${{ runner.os }}-${{ matrix.compiler }}- + - name: install dependencies + run: | + $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --dependencies-only -j2 all + $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dependencies-only -j2 all + - name: build w/o tests + run: | + $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all + - name: build + run: | + $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --write-ghc-environment-files=always + - name: cabal check + run: | + cd ${PKGDIR_language_python} || false + ${CABAL} -vnormal check + cd ${PKGDIR_language_python_test} || false + ${CABAL} -vnormal check + - name: haddock + run: | + $CABAL v2-haddock --disable-documentation --haddock-all $ARG_COMPILER --with-haddock $HADDOCK $ARG_TESTS $ARG_BENCH all + - name: unconstrained build + run: | + rm -f cabal.project.local + $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all + - name: save cache + uses: actions/cache/save@v3 + if: always() + with: + key: ${{ runner.os }}-${{ matrix.compiler }}-${{ github.sha }} + path: ~/.cabal/store diff --git a/.gitignore b/.gitignore index 39d22d9..552ce1b 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ cabal.sandbox.config .idea language-python.iml .stack-work +dist-newstyle diff --git a/README.md b/README.md index 439fd0c..c67046f 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,11 @@ Introduction A lexer, parser and pretty printing library for Python 2 and 3. +Intended use case +----------------- + +Generally speaking this library was written with the intention of parsing Python for the purposes of program transformation and compilation. It can also be used for Python code generation, but that is not the primary goal. As a consequence the "Abstract Syntax Tree" is not particularly abstract, and might be better described as a "Concrete Syntax Tree". + License ------- @@ -11,23 +16,33 @@ language-python is released as open source software under the terms of the 3 cla Installation ------------ -language-python can be installed with cabal: +language-python can be installed with cabal (cabal version 3.0.0.0 onwards): - cabal install language-python +``` +cabal build all +``` -or, if you prefer, in a sandbox (recommended): +Testing +------- - cabal sandbox init - cabal update - cabal install happy - cabal install alex - cabal install --dry-run - cabal install +Test cases are provided in a language-python-test. Test binaries can be built and run as below (cabal version 3.0.0.0 onwards): -use `-jN` for `N` threads of parallel building in the `cabal install` step if you have a multi-core machine. +``` +cabal install shelltestrunner +cabal install --overwrite-policy=always language-python-test +cd language-python-test/ +PATH=$HOME/.cabal/bin/:$PATH make test +``` -Testing -------- +If all goes well you should see a long list of test cases and a summary at the end, that should look like the following: +``` + Test Cases Total + Passed 519 519 + Failed 0 0 + Total 519 519 +``` + +Pull requests +------------- -Test cases are provided in a separate package -[language-python-test](https://github.com/bjpop/language-python-test). +Pull requests are greatly appreciated. If you plan to submit a pull request, please test your code in the test suite first. New test cases are desirable, especially if you are fixing a bug or adding a new feature. diff --git a/cabal.haskell-ci b/cabal.haskell-ci new file mode 100644 index 0000000..1dc307a --- /dev/null +++ b/cabal.haskell-ci @@ -0,0 +1,4 @@ +branches: master ci-* + +-- monads-tf does not like transformers-0.6 +installed: +all -transformers -mtl \ No newline at end of file diff --git a/cabal.project b/cabal.project new file mode 100644 index 0000000..18c256c --- /dev/null +++ b/cabal.project @@ -0,0 +1,2 @@ +packages: language-python/ + language-python-test/ diff --git a/language-python-test/.gitignore b/language-python-test/.gitignore new file mode 100644 index 0000000..a7e7fcf --- /dev/null +++ b/language-python-test/.gitignore @@ -0,0 +1,7 @@ +.cabal-sandbox +cabal.sandbox.config +dist +.idea +language-python-test.iml +.stack-work +out diff --git a/LICENSE b/language-python-test/LICENSE similarity index 100% rename from LICENSE rename to language-python-test/LICENSE diff --git a/language-python-test/Makefile b/language-python-test/Makefile new file mode 100644 index 0000000..2009a6b --- /dev/null +++ b/language-python-test/Makefile @@ -0,0 +1,21 @@ +# Copyright : (c) 2014 Bernie Pope +# License : BSD-style +# Maintainer : florbitous@gmail.com + +# A convenience Makefile. + +.PHONY: test +test: + shelltest --color --execdir test/ -- -j1 + +.PHONY: test_features +test_features: + shelltest --color --execdir test/features -- -j1 + +.PHONY: test_cpython2 +test_cpython2: + shelltest --color --execdir test/CPython_test_suite_v2 -- -j1 + +.PHONY: test_cpython3 +test_cpython3: + shelltest --color --execdir test/CPython_test_suite_v3 -- -j1 diff --git a/language-python-test/README.md b/language-python-test/README.md new file mode 100644 index 0000000..8ca9dd6 --- /dev/null +++ b/language-python-test/README.md @@ -0,0 +1,71 @@ +Introduction +============ + +A test suite for the Haskell library [language-python](https://github.com/bjpop/language-python) (a parser for Python 2 and 3). + +License +------- + +language-python-test is released as open source software under the terms of the 3 clause BSD License. See the file LICENCE.txt in the [source code repository of language-python-test](https://github.com/bjpop/language-python-test). + +This package also contains files from the CPython test suite. Those files are found in the +sub-directories test/CPython_test_suite_v2 and test/CPython_test_suite_v3. The license for those files is +contained in those directories. + +Installation +------------ + +Using cabal version 3.0.0.0 or greater: + +``` +cabal build +cabal install +``` + +Usage +----- + +The package builds the following executable programs: + +* language-python-parse-pretty +* language-python-roundtrip +* language-python-tokens + +The first program parses a Python file as input and pretty prints it back again. + +The second program performs a round-trip of parse, pretty print, parse and pretty print. It checks that the first +pretty print is equal to the second pretty print. This is not a perfect test for correctness, but it does check that the parser and pretty printer agree to some extent, and is usually quite good at finding errors. + +The third program performs lexical analysis on the input Python file and pretty prints the resulting token stream. + +The test suite (which tests the behaviour of language-python) uses the shelltest tool. To run the tests you need to have shelltest installed: + +``` +cabal install shelltestrunner +``` + +The tests are found in the sub-directory called tests. + +You can run the tests like so from the top directory of the language-python-test package: + +``` +shelltest --color --execdir test/ -- -j1 +``` + +If you have installed into a cabal sandbox, then you might need to adjust your path: + +``` +PATH=$HOME/.cabal/bin/:$PATH shelltest --color --execdir test/ -- -j1 +``` + +We provide a Makefile for convenience which does the same thing. You can run it like so: + +``` +make test +``` + +or, with PATH adjustment: + +``` +PATH=$HOME/.cabal/bin/:$PATH make test +``` diff --git a/Setup.lhs b/language-python-test/Setup.lhs similarity index 100% rename from Setup.lhs rename to language-python-test/Setup.lhs diff --git a/language-python-test/docs/notes.txt b/language-python-test/docs/notes.txt new file mode 100644 index 0000000..bc5a130 --- /dev/null +++ b/language-python-test/docs/notes.txt @@ -0,0 +1,18 @@ +Notes about the design of language-python-test +-------------------------------------------------------------------------------- + +1. Why is this a separate package, and not included in the language-python + source tree? + +Mainly because of a limitation in the current Haskell cabal tool. It appears +to be difficult to have a single cabal package which defines a library and an +executable which depends on the library. While it seems to be possible, it is +inconvenient because you seem to have to specify all depenencies twice. + +2. For roundtrip testing we will have three kinds of tests: + - those specific to version 2 of python + - those specific to version 3 of python + - those which are independent of the python version (should be tested by both) + + + diff --git a/language-python-test/language-python-test.cabal b/language-python-test/language-python-test.cabal new file mode 100644 index 0000000..10d9c59 --- /dev/null +++ b/language-python-test/language-python-test.cabal @@ -0,0 +1,52 @@ +name: language-python-test +version: 0.6.0 +cabal-version: >= 1.8 +synopsis: testing code for the language-python library +description: A few executables to test the language-python library. +category: Language +license: BSD3 +license-file: LICENSE +copyright: (c) 2014-2019 Bernard James Pope +author: Bernard James Pope (Bernie Pope) +maintainer: florbitous@gmail.com +homepage: http://github.com/bjpop/language-python-test +build-type: Simple +stability: experimental + +tested-with: + GHC == 9.6.1 + GHC == 9.4.4 + GHC == 9.2.7 + GHC == 9.0.2 + GHC == 8.10.7 + GHC == 8.8.4 + GHC == 8.6.5 + GHC == 8.4.4 + GHC == 8.2.2 + GHC == 8.0.2 + GHC == 7.10.3 + +source-repository head + type: git + location: https://github.com/bjpop/language-python-test + +Executable language-python-roundtrip + hs-source-dirs: + ./src + main-is: RoundTrip.hs + other-modules: + build-depends: base == 4.*, language-python == 0.6.0 + +Executable language-python-tokens + hs-source-dirs: + ./src + main-is: Tokens.hs + other-modules: + build-depends: base == 4.*, language-python == 0.6.0 + +Executable language-python-parse-pretty + hs-source-dirs: + ./src + main-is: ParsePretty.hs + other-modules: + build-depends: base == 4.*, language-python == 0.6.0 diff --git a/language-python-test/src/ParsePretty.hs b/language-python-test/src/ParsePretty.hs new file mode 100644 index 0000000..4cdc278 --- /dev/null +++ b/language-python-test/src/ParsePretty.hs @@ -0,0 +1,42 @@ +import Language.Python.Common +import Language.Python.Version2 as V2 +import Language.Python.Version3 as V3 +import System.Exit +import System.Environment + +data PythonVersion = Two | Three + deriving (Eq, Show) + +type Parser = String -> String -> Either ParseError (ModuleSpan, [Token]) + +main :: IO () +main = do + args <- getArgs + case args of + (versionStr:inFile:_rest) -> + case parseVersion versionStr of + Nothing -> do + putStrLn $ "Unknown Python version: " ++ versionStr + exitFailure + Just version -> do + contents <- readFile inFile + let parser = pickParser version + case parseAndPretty parser inFile contents of + Left error -> putStrLn $ prettyText error + Right ast -> putStrLn $ prettyText ast + _other -> putStrLn "Incorrect command line. Expected: <2|3|n> inputFileName" + +pickParser :: PythonVersion -> Parser +pickParser Two = V2.parseModule +pickParser Three = V3.parseModule + +parseAndPretty :: Parser -> FilePath -> String -> Either ParseError ModuleSpan +parseAndPretty parser fileName contents = + case parser contents fileName of + Left e -> Left e + Right (ast, _comments) -> Right ast + +parseVersion :: String -> Maybe PythonVersion +parseVersion "2" = Just Two +parseVersion "3" = Just Three +parseVersion _other = Nothing diff --git a/language-python-test/src/RoundTrip.hs b/language-python-test/src/RoundTrip.hs new file mode 100644 index 0000000..bbc8546 --- /dev/null +++ b/language-python-test/src/RoundTrip.hs @@ -0,0 +1,80 @@ +import Language.Python.Common +import Language.Python.Version2 as V2 +import Language.Python.Version3 as V3 +import System.Exit +import System.Environment + +data PythonVersion = Two | Three | Both + deriving (Eq, Show) + +data Comparison = ParseFailed String | Equal | NotEqual String String +type Parser = String -> String -> Either ParseError (ModuleSpan, [Token]) + +main :: IO () +main = do + args <- getArgs + case args of + (versionStr:inFile:_rest) -> + case parseVersion versionStr of + Nothing -> do + putStrLn $ "Unknown Python version: " ++ versionStr + exitFailure + Just version -> do + contents <- readFile inFile + let parsers = pickParsers version + comparisons = [parseAndCompare p inFile contents | p <- parsers] + test <- check comparisons + if test then exitWith ExitSuccess else exitSuccess + _other -> putStrLn "Incorrect command line. Expected: <2|3|n> inputFileName" + +check :: [Comparison] -> IO Bool +check [] = return True -- must have all been equal +check (Equal:rest) = check rest +check (NotEqual s1 s2:_rest) = do + doubleLine + putStrLn "Round trip parse failed" + doubleLine + putStrLn "pretty1" + line + putStrLn s1 + doubleLine + putStrLn "pretty2" + line + putStrLn s2 + return False +check (ParseFailed e:_rest) = do + putStrLn "Parse failed with error: " + putStrLn e + return False + +pickParsers :: PythonVersion -> [Parser] +pickParsers Two = [V2.parseModule] +pickParsers Three = [V3.parseModule] +pickParsers Both = [V2.parseModule, V3.parseModule] + +parseAndCompare :: Parser -> FilePath -> String -> Comparison +parseAndCompare parser inFile contents = + case parseAndPretty parser inFile contents of + Left e -> ParseFailed $ prettyText e + Right pretty1 -> + case parseAndPretty parser "" pretty1 of + Left e -> ParseFailed $ prettyText e + Right pretty2 + | pretty1 == pretty2 -> Equal + | otherwise -> NotEqual pretty1 pretty2 + +line, doubleLine :: IO () +line = putStrLn $ replicate 80 '-' +doubleLine = putStrLn $ replicate 80 '=' + +parseAndPretty :: Parser -> FilePath -> String -> Either ParseError String +parseAndPretty parser fileName contents = + case parser contents fileName of + Left e -> Left e + Right (ast, _comments) -> Right (prettyText ast ++ "\n") + +parseVersion :: String -> Maybe PythonVersion +parseVersion "2" = Just Two +parseVersion "3" = Just Three +parseVersion "n" = Just Both +parseVersion _other = Nothing diff --git a/language-python-test/src/Tokens.hs b/language-python-test/src/Tokens.hs new file mode 100644 index 0000000..b05eac6 --- /dev/null +++ b/language-python-test/src/Tokens.hs @@ -0,0 +1,39 @@ +import Language.Python.Common +import Language.Python.Version2 as V2 +import Language.Python.Version3 as V3 +import System.Exit +import System.Environment + +data PythonVersion = Two | Three + deriving (Eq, Show) + +type Lexer = String -> String -> Either ParseError [Token] + +main :: IO () +main = do + args <- getArgs + case args of + (versionStr:inFile:_rest) -> + case parseVersion versionStr of + Nothing -> do + putStrLn $ "Unknown Python version: " ++ versionStr + exitFailure + Just version -> do + contents <- readFile inFile + runLexer inFile (pickLexer version) contents + _other -> putStrLn "Incorrect command line. Expected: <2|3> inputFileName" + +pickLexer :: PythonVersion -> Lexer +pickLexer Two = V2.lex +pickLexer Three = V3.lex + +parseVersion :: String -> Maybe PythonVersion +parseVersion "2" = Just Two +parseVersion "3" = Just Three +parseVersion _other = Nothing + +runLexer :: FilePath -> Lexer -> String -> IO () +runLexer inFile lex contents = do + case lex contents inFile of + Left e -> print e + Right toks -> putStr $ unlines $ map debugTokenString toks diff --git a/language-python-test/test/CPython_test_suite_v2/BaseHTTPServer.py b/language-python-test/test/CPython_test_suite_v2/BaseHTTPServer.py new file mode 100644 index 0000000..acd8394 --- /dev/null +++ b/language-python-test/test/CPython_test_suite_v2/BaseHTTPServer.py @@ -0,0 +1,592 @@ +"""HTTP server base class. + +Note: the class in this module doesn't implement any HTTP request; see +SimpleHTTPServer for simple implementations of GET, HEAD and POST +(including CGI scripts). It does, however, optionally implement HTTP/1.1 +persistent connections, as of version 0.3. + +Contents: + +- BaseHTTPRequestHandler: HTTP request handler base class +- test: test function + +XXX To do: + +- log requests even later (to capture byte count) +- log user-agent header and other interesting goodies +- send error log to separate file +""" + + +# See also: +# +# HTTP Working Group T. Berners-Lee +# INTERNET-DRAFT R. T. Fielding +# H. Frystyk Nielsen +# Expires September 8, 1995 March 8, 1995 +# +# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt +# +# and +# +# Network Working Group R. Fielding +# Request for Comments: 2616 et al +# Obsoletes: 2068 June 1999 +# Category: Standards Track +# +# URL: http://www.faqs.org/rfcs/rfc2616.html + +# Log files +# --------- +# +# Here's a quote from the NCSA httpd docs about log file format. +# +# | The logfile format is as follows. Each line consists of: +# | +# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb +# | +# | host: Either the DNS name or the IP number of the remote client +# | rfc931: Any information returned by identd for this person, +# | - otherwise. +# | authuser: If user sent a userid for authentication, the user name, +# | - otherwise. +# | DD: Day +# | Mon: Month (calendar name) +# | YYYY: Year +# | hh: hour (24-hour format, the machine's timezone) +# | mm: minutes +# | ss: seconds +# | request: The first line of the HTTP request as sent by the client. +# | ddd: the status code returned by the server, - if not available. +# | bbbb: the total number of bytes sent, +# | *not including the HTTP/1.0 header*, - if not available +# | +# | You can determine the name of the file accessed through request. +# +# (Actually, the latter is only true if you know the server configuration +# at the time the request was made!) + +__version__ = "0.3" + +__all__ = ["HTTPServer", "BaseHTTPRequestHandler"] + +import sys +import time +import socket # For gethostbyaddr() +from warnings import filterwarnings, catch_warnings +with catch_warnings(): + if sys.py3kwarning: + filterwarnings("ignore", ".*mimetools has been removed", + DeprecationWarning) + import mimetools +import SocketServer + +# Default error message template +DEFAULT_ERROR_MESSAGE = """\ + +Error response + + +

Error response

+

Error code %(code)d. +

Message: %(message)s. +

Error code explanation: %(code)s = %(explain)s. + +""" + +DEFAULT_ERROR_CONTENT_TYPE = "text/html" + +def _quote_html(html): + return html.replace("&", "&").replace("<", "<").replace(">", ">") + +class HTTPServer(SocketServer.TCPServer): + + allow_reuse_address = 1 # Seems to make sense in testing environment + + def server_bind(self): + """Override server_bind to store the server name.""" + SocketServer.TCPServer.server_bind(self) + host, port = self.socket.getsockname()[:2] + self.server_name = socket.getfqdn(host) + self.server_port = port + + +class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler): + + """HTTP request handler base class. + + The following explanation of HTTP serves to guide you through the + code as well as to expose any misunderstandings I may have about + HTTP (so you don't need to read the code to figure out I'm wrong + :-). + + HTTP (HyperText Transfer Protocol) is an extensible protocol on + top of a reliable stream transport (e.g. TCP/IP). The protocol + recognizes three parts to a request: + + 1. One line identifying the request type and path + 2. An optional set of RFC-822-style headers + 3. An optional data part + + The headers and data are separated by a blank line. + + The first line of the request has the form + + + + where is a (case-sensitive) keyword such as GET or POST, + is a string containing path information for the request, + and should be the string "HTTP/1.0" or "HTTP/1.1". + is encoded using the URL encoding scheme (using %xx to signify + the ASCII character with hex code xx). + + The specification specifies that lines are separated by CRLF but + for compatibility with the widest range of clients recommends + servers also handle LF. Similarly, whitespace in the request line + is treated sensibly (allowing multiple spaces between components + and allowing trailing whitespace). + + Similarly, for output, lines ought to be separated by CRLF pairs + but most clients grok LF characters just fine. + + If the first line of the request has the form + + + + (i.e. is left out) then this is assumed to be an HTTP + 0.9 request; this form has no optional headers and data part and + the reply consists of just the data. + + The reply form of the HTTP 1.x protocol again has three parts: + + 1. One line giving the response code + 2. An optional set of RFC-822-style headers + 3. The data + + Again, the headers and data are separated by a blank line. + + The response code line has the form + + + + where is the protocol version ("HTTP/1.0" or "HTTP/1.1"), + is a 3-digit response code indicating success or + failure of the request, and is an optional + human-readable string explaining what the response code means. + + This server parses the request and the headers, and then calls a + function specific to the request type (). Specifically, + a request SPAM will be handled by a method do_SPAM(). If no + such method exists the server sends an error response to the + client. If it exists, it is called with no arguments: + + do_SPAM() + + Note that the request name is case sensitive (i.e. SPAM and spam + are different requests). + + The various request details are stored in instance variables: + + - client_address is the client IP address in the form (host, + port); + + - command, path and version are the broken-down request line; + + - headers is an instance of mimetools.Message (or a derived + class) containing the header information; + + - rfile is a file object open for reading positioned at the + start of the optional input data part; + + - wfile is a file object open for writing. + + IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! + + The first thing to be written must be the response line. Then + follow 0 or more header lines, then a blank line, and then the + actual data (if any). The meaning of the header lines depends on + the command executed by the server; in most cases, when data is + returned, there should be at least one header line of the form + + Content-type: / + + where and should be registered MIME types, + e.g. "text/html" or "text/plain". + + """ + + # The Python system version, truncated to its first component. + sys_version = "Python/" + sys.version.split()[0] + + # The server software version. You may want to override this. + # The format is multiple whitespace-separated strings, + # where each string is of the form name[/version]. + server_version = "BaseHTTP/" + __version__ + + # The default request version. This only affects responses up until + # the point where the request line is parsed, so it mainly decides what + # the client gets back when sending a malformed request line. + # Most web servers default to HTTP 0.9, i.e. don't send a status line. + default_request_version = "HTTP/0.9" + + def parse_request(self): + """Parse a request (internal). + + The request should be stored in self.raw_requestline; the results + are in self.command, self.path, self.request_version and + self.headers. + + Return True for success, False for failure; on failure, an + error is sent back. + + """ + self.command = None # set in case of error on the first line + self.request_version = version = self.default_request_version + self.close_connection = 1 + requestline = self.raw_requestline + if requestline[-2:] == '\r\n': + requestline = requestline[:-2] + elif requestline[-1:] == '\n': + requestline = requestline[:-1] + self.requestline = requestline + words = requestline.split() + if len(words) == 3: + [command, path, version] = words + if version[:5] != 'HTTP/': + self.send_error(400, "Bad request version (%r)" % version) + return False + try: + base_version_number = version.split('/', 1)[1] + version_number = base_version_number.split(".") + # RFC 2145 section 3.1 says there can be only one "." and + # - major and minor numbers MUST be treated as + # separate integers; + # - HTTP/2.4 is a lower version than HTTP/2.13, which in + # turn is lower than HTTP/12.3; + # - Leading zeros MUST be ignored by recipients. + if len(version_number) != 2: + raise ValueError + version_number = int(version_number[0]), int(version_number[1]) + except (ValueError, IndexError): + self.send_error(400, "Bad request version (%r)" % version) + return False + if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": + self.close_connection = 0 + if version_number >= (2, 0): + self.send_error(505, + "Invalid HTTP Version (%s)" % base_version_number) + return False + elif len(words) == 2: + [command, path] = words + self.close_connection = 1 + if command != 'GET': + self.send_error(400, + "Bad HTTP/0.9 request type (%r)" % command) + return False + elif not words: + return False + else: + self.send_error(400, "Bad request syntax (%r)" % requestline) + return False + self.command, self.path, self.request_version = command, path, version + + # Examine the headers and look for a Connection directive + self.headers = self.MessageClass(self.rfile, 0) + + conntype = self.headers.get('Connection', "") + if conntype.lower() == 'close': + self.close_connection = 1 + elif (conntype.lower() == 'keep-alive' and + self.protocol_version >= "HTTP/1.1"): + self.close_connection = 0 + return True + + def handle_one_request(self): + """Handle a single HTTP request. + + You normally don't need to override this method; see the class + __doc__ string for information on how to handle specific HTTP + commands such as GET and POST. + + """ + self.raw_requestline = self.rfile.readline() + if not self.raw_requestline: + self.close_connection = 1 + return + if not self.parse_request(): # An error code has been sent, just exit + return + mname = 'do_' + self.command + if not hasattr(self, mname): + self.send_error(501, "Unsupported method (%r)" % self.command) + return + method = getattr(self, mname) + method() + + def handle(self): + """Handle multiple requests if necessary.""" + self.close_connection = 1 + + self.handle_one_request() + while not self.close_connection: + self.handle_one_request() + + def send_error(self, code, message=None): + """Send and log an error reply. + + Arguments are the error code, and a detailed message. + The detailed message defaults to the short entry matching the + response code. + + This sends an error response (so it must be called before any + output has been generated), logs the error, and finally sends + a piece of HTML explaining the error to the user. + + """ + + try: + short, long = self.responses[code] + except KeyError: + short, long = '???', '???' + if message is None: + message = short + explain = long + self.log_error("code %d, message %s", code, message) + # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201) + content = (self.error_message_format % + {'code': code, 'message': _quote_html(message), 'explain': explain}) + self.send_response(code, message) + self.send_header("Content-Type", self.error_content_type) + self.send_header('Connection', 'close') + self.end_headers() + if self.command != 'HEAD' and code >= 200 and code not in (204, 304): + self.wfile.write(content) + + error_message_format = DEFAULT_ERROR_MESSAGE + error_content_type = DEFAULT_ERROR_CONTENT_TYPE + + def send_response(self, code, message=None): + """Send the response header and log the response code. + + Also send two standard headers with the server software + version and the current date. + + """ + self.log_request(code) + if message is None: + if code in self.responses: + message = self.responses[code][0] + else: + message = '' + if self.request_version != 'HTTP/0.9': + self.wfile.write("%s %d %s\r\n" % + (self.protocol_version, code, message)) + # print (self.protocol_version, code, message) + self.send_header('Server', self.version_string()) + self.send_header('Date', self.date_time_string()) + + def send_header(self, keyword, value): + """Send a MIME header.""" + if self.request_version != 'HTTP/0.9': + self.wfile.write("%s: %s\r\n" % (keyword, value)) + + if keyword.lower() == 'connection': + if value.lower() == 'close': + self.close_connection = 1 + elif value.lower() == 'keep-alive': + self.close_connection = 0 + + def end_headers(self): + """Send the blank line ending the MIME headers.""" + if self.request_version != 'HTTP/0.9': + self.wfile.write("\r\n") + + def log_request(self, code='-', size='-'): + """Log an accepted request. + + This is called by send_response(). + + """ + + self.log_message('"%s" %s %s', + self.requestline, str(code), str(size)) + + def log_error(self, format, *args): + """Log an error. + + This is called when a request cannot be fulfilled. By + default it passes the message on to log_message(). + + Arguments are the same as for log_message(). + + XXX This should go to the separate error log. + + """ + + self.log_message(format, *args) + + def log_message(self, format, *args): + """Log an arbitrary message. + + This is used by all other logging functions. Override + it if you have specific logging wishes. + + The first argument, FORMAT, is a format string for the + message to be logged. If the format string contains + any % escapes requiring parameters, they should be + specified as subsequent arguments (it's just like + printf!). + + The client host and current date/time are prefixed to + every message. + + """ + + sys.stderr.write("%s - - [%s] %s\n" % + (self.address_string(), + self.log_date_time_string(), + format%args)) + + def version_string(self): + """Return the server software version string.""" + return self.server_version + ' ' + self.sys_version + + def date_time_string(self, timestamp=None): + """Return the current date and time formatted for a message header.""" + if timestamp is None: + timestamp = time.time() + year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp) + s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( + self.weekdayname[wd], + day, self.monthname[month], year, + hh, mm, ss) + return s + + def log_date_time_string(self): + """Return the current time formatted for logging.""" + now = time.time() + year, month, day, hh, mm, ss, x, y, z = time.localtime(now) + s = "%02d/%3s/%04d %02d:%02d:%02d" % ( + day, self.monthname[month], year, hh, mm, ss) + return s + + weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] + + monthname = [None, + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + + def address_string(self): + """Return the client address formatted for logging. + + This version looks up the full hostname using gethostbyaddr(), + and tries to find a name that contains at least one dot. + + """ + + host, port = self.client_address[:2] + return socket.getfqdn(host) + + # Essentially static class variables + + # The version of the HTTP protocol we support. + # Set this to HTTP/1.1 to enable automatic keepalive + protocol_version = "HTTP/1.0" + + # The Message-like class used to parse headers + MessageClass = mimetools.Message + + # Table mapping response codes to messages; entries have the + # form {code: (shortmessage, longmessage)}. + # See RFC 2616. + responses = { + 100: ('Continue', 'Request received, please continue'), + 101: ('Switching Protocols', + 'Switching to new protocol; obey Upgrade header'), + + 200: ('OK', 'Request fulfilled, document follows'), + 201: ('Created', 'Document created, URL follows'), + 202: ('Accepted', + 'Request accepted, processing continues off-line'), + 203: ('Non-Authoritative Information', 'Request fulfilled from cache'), + 204: ('No Content', 'Request fulfilled, nothing follows'), + 205: ('Reset Content', 'Clear input form for further input.'), + 206: ('Partial Content', 'Partial content follows.'), + + 300: ('Multiple Choices', + 'Object has several resources -- see URI list'), + 301: ('Moved Permanently', 'Object moved permanently -- see URI list'), + 302: ('Found', 'Object moved temporarily -- see URI list'), + 303: ('See Other', 'Object moved -- see Method and URL list'), + 304: ('Not Modified', + 'Document has not changed since given time'), + 305: ('Use Proxy', + 'You must use proxy specified in Location to access this ' + 'resource.'), + 307: ('Temporary Redirect', + 'Object moved temporarily -- see URI list'), + + 400: ('Bad Request', + 'Bad request syntax or unsupported method'), + 401: ('Unauthorized', + 'No permission -- see authorization schemes'), + 402: ('Payment Required', + 'No payment -- see charging schemes'), + 403: ('Forbidden', + 'Request forbidden -- authorization will not help'), + 404: ('Not Found', 'Nothing matches the given URI'), + 405: ('Method Not Allowed', + 'Specified method is invalid for this server.'), + 406: ('Not Acceptable', 'URI not available in preferred format.'), + 407: ('Proxy Authentication Required', 'You must authenticate with ' + 'this proxy before proceeding.'), + 408: ('Request Timeout', 'Request timed out; try again later.'), + 409: ('Conflict', 'Request conflict.'), + 410: ('Gone', + 'URI no longer exists and has been permanently removed.'), + 411: ('Length Required', 'Client must specify Content-Length.'), + 412: ('Precondition Failed', 'Precondition in headers is false.'), + 413: ('Request Entity Too Large', 'Entity is too large.'), + 414: ('Request-URI Too Long', 'URI is too long.'), + 415: ('Unsupported Media Type', 'Entity body in unsupported format.'), + 416: ('Requested Range Not Satisfiable', + 'Cannot satisfy request range.'), + 417: ('Expectation Failed', + 'Expect condition could not be satisfied.'), + + 500: ('Internal Server Error', 'Server got itself in trouble'), + 501: ('Not Implemented', + 'Server does not support this operation'), + 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'), + 503: ('Service Unavailable', + 'The server cannot process the request due to a high load'), + 504: ('Gateway Timeout', + 'The gateway server did not receive a timely response'), + 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'), + } + + +def test(HandlerClass = BaseHTTPRequestHandler, + ServerClass = HTTPServer, protocol="HTTP/1.0"): + """Test the HTTP request handler class. + + This runs an HTTP server on port 8000 (or the first command line + argument). + + """ + + if sys.argv[1:]: + port = int(sys.argv[1]) + else: + port = 8000 + server_address = ('', port) + + HandlerClass.protocol_version = protocol + httpd = ServerClass(server_address, HandlerClass) + + sa = httpd.socket.getsockname() + print "Serving HTTP on", sa[0], "port", sa[1], "..." + httpd.serve_forever() + + +if __name__ == '__main__': + test() diff --git a/language-python-test/test/CPython_test_suite_v2/BaseHTTPServer.test b/language-python-test/test/CPython_test_suite_v2/BaseHTTPServer.test new file mode 100644 index 0000000..823b5e9 --- /dev/null +++ b/language-python-test/test/CPython_test_suite_v2/BaseHTTPServer.test @@ -0,0 +1,5 @@ +language-python-roundtrip 2 BaseHTTPServer.py +<<< +>>> +>>>2 +>>>=0 diff --git a/language-python-test/test/CPython_test_suite_v2/Bastion.py b/language-python-test/test/CPython_test_suite_v2/Bastion.py new file mode 100644 index 0000000..d0dddbf --- /dev/null +++ b/language-python-test/test/CPython_test_suite_v2/Bastion.py @@ -0,0 +1,180 @@ +"""Bastionification utility. + +A bastion (for another object -- the 'original') is an object that has +the same methods as the original but does not give access to its +instance variables. Bastions have a number of uses, but the most +obvious one is to provide code executing in restricted mode with a +safe interface to an object implemented in unrestricted mode. + +The bastionification routine has an optional second argument which is +a filter function. Only those methods for which the filter method +(called with the method name as argument) returns true are accessible. +The default filter method returns true unless the method name begins +with an underscore. + +There are a number of possible implementations of bastions. We use a +'lazy' approach where the bastion's __getattr__() discipline does all +the work for a particular method the first time it is used. This is +usually fastest, especially if the user doesn't call all available +methods. The retrieved methods are stored as instance variables of +the bastion, so the overhead is only occurred on the first use of each +method. + +Detail: the bastion class has a __repr__() discipline which includes +the repr() of the original object. This is precomputed when the +bastion is created. + +""" +from warnings import warnpy3k +warnpy3k("the Bastion module has been removed in Python 3.0", stacklevel=2) +del warnpy3k + +__all__ = ["BastionClass", "Bastion"] + +from types import MethodType + + +class BastionClass: + + """Helper class used by the Bastion() function. + + You could subclass this and pass the subclass as the bastionclass + argument to the Bastion() function, as long as the constructor has + the same signature (a get() function and a name for the object). + + """ + + def __init__(self, get, name): + """Constructor. + + Arguments: + + get - a function that gets the attribute value (by name) + name - a human-readable name for the original object + (suggestion: use repr(object)) + + """ + self._get_ = get + self._name_ = name + + def __repr__(self): + """Return a representation string. + + This includes the name passed in to the constructor, so that + if you print the bastion during debugging, at least you have + some idea of what it is. + + """ + return "" % self._name_ + + def __getattr__(self, name): + """Get an as-yet undefined attribute value. + + This calls the get() function that was passed to the + constructor. The result is stored as an instance variable so + that the next time the same attribute is requested, + __getattr__() won't be invoked. + + If the get() function raises an exception, this is simply + passed on -- exceptions are not cached. + + """ + attribute = self._get_(name) + self.__dict__[name] = attribute + return attribute + + +def Bastion(object, filter = lambda name: name[:1] != '_', + name=None, bastionclass=BastionClass): + """Create a bastion for an object, using an optional filter. + + See the Bastion module's documentation for background. + + Arguments: + + object - the original object + filter - a predicate that decides whether a function name is OK; + by default all names are OK that don't start with '_' + name - the name of the object; default repr(object) + bastionclass - class used to create the bastion; default BastionClass + + """ + + raise RuntimeError, "This code is not secure in Python 2.2 and later" + + # Note: we define *two* ad-hoc functions here, get1 and get2. + # Both are intended to be called in the same way: get(name). + # It is clear that the real work (getting the attribute + # from the object and calling the filter) is done in get1. + # Why can't we pass get1 to the bastion? Because the user + # would be able to override the filter argument! With get2, + # overriding the default argument is no security loophole: + # all it does is call it. + # Also notice that we can't place the object and filter as + # instance variables on the bastion object itself, since + # the user has full access to all instance variables! + + def get1(name, object=object, filter=filter): + """Internal function for Bastion(). See source comments.""" + if filter(name): + attribute = getattr(object, name) + if type(attribute) == MethodType: + return attribute + raise AttributeError, name + + def get2(name, get1=get1): + """Internal function for Bastion(). See source comments.""" + return get1(name) + + if name is None: + name = repr(object) + return bastionclass(get2, name) + + +def _test(): + """Test the Bastion() function.""" + class Original: + def __init__(self): + self.sum = 0 + def add(self, n): + self._add(n) + def _add(self, n): + self.sum = self.sum + n + def total(self): + return self.sum + o = Original() + b = Bastion(o) + testcode = """if 1: + b.add(81) + b.add(18) + print "b.total() =", b.total() + try: + print "b.sum =", b.sum, + except: + print "inaccessible" + else: + print "accessible" + try: + print "b._add =", b._add, + except: + print "inaccessible" + else: + print "accessible" + try: + print "b._get_.func_defaults =", map(type, b._get_.func_defaults), + except: + print "inaccessible" + else: + print "accessible" + \n""" + exec testcode + print '='*20, "Using rexec:", '='*20 + import rexec + r = rexec.RExec() + m = r.add_module('__main__') + m.b = b + r.r_exec(testcode) + + +if __name__ == '__main__': + _test() diff --git a/language-python-test/test/CPython_test_suite_v2/Bastion.test b/language-python-test/test/CPython_test_suite_v2/Bastion.test new file mode 100644 index 0000000..4474657 --- /dev/null +++ b/language-python-test/test/CPython_test_suite_v2/Bastion.test @@ -0,0 +1,5 @@ +language-python-roundtrip 2 Bastion.py +<<< +>>> +>>>2 +>>>=0 diff --git a/language-python-test/test/CPython_test_suite_v2/CGIHTTPServer.py b/language-python-test/test/CPython_test_suite_v2/CGIHTTPServer.py new file mode 100644 index 0000000..71f0368 --- /dev/null +++ b/language-python-test/test/CPython_test_suite_v2/CGIHTTPServer.py @@ -0,0 +1,366 @@ +"""CGI-savvy HTTP Server. + +This module builds on SimpleHTTPServer by implementing GET and POST +requests to cgi-bin scripts. + +If the os.fork() function is not present (e.g. on Windows), +os.popen2() is used as a fallback, with slightly altered semantics; if +that function is not present either (e.g. on Macintosh), only Python +scripts are supported, and they are executed by the current process. + +In all cases, the implementation is intentionally naive -- all +requests are executed sychronously. + +SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL +-- it may execute arbitrary Python code or external programs. + +Note that status code 200 is sent prior to execution of a CGI script, so +scripts cannot send other status codes such as 302 (redirect). +""" + + +__version__ = "0.4" + +__all__ = ["CGIHTTPRequestHandler"] + +import os +import sys +import urllib +import BaseHTTPServer +import SimpleHTTPServer +import select + + +class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): + + """Complete HTTP server with GET, HEAD and POST commands. + + GET and HEAD also support running CGI scripts. + + The POST command is *only* implemented for CGI scripts. + + """ + + # Determine platform specifics + have_fork = hasattr(os, 'fork') + have_popen2 = hasattr(os, 'popen2') + have_popen3 = hasattr(os, 'popen3') + + # Make rfile unbuffered -- we need to read one line and then pass + # the rest to a subprocess, so we can't use buffered input. + rbufsize = 0 + + def do_POST(self): + """Serve a POST request. + + This is only implemented for CGI scripts. + + """ + + if self.is_cgi(): + self.run_cgi() + else: + self.send_error(501, "Can only POST to CGI scripts") + + def send_head(self): + """Version of send_head that support CGI scripts""" + if self.is_cgi(): + return self.run_cgi() + else: + return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self) + + def is_cgi(self): + """Test whether self.path corresponds to a CGI script, + and return a boolean. + + This function sets self.cgi_info to a tuple (dir, rest) + when it returns True, where dir is the directory part before + the CGI script name. Note that rest begins with a + slash if it is not empty. + + The default implementation tests whether the path + begins with one of the strings in the list + self.cgi_directories (and the next character is a '/' + or the end of the string). + """ + + path = self.path + + for x in self.cgi_directories: + i = len(x) + if path[:i] == x and (not path[i:] or path[i] == '/'): + self.cgi_info = path[:i], path[i+1:] + return True + return False + + cgi_directories = ['/cgi-bin', '/htbin'] + + def is_executable(self, path): + """Test whether argument path is an executable file.""" + return executable(path) + + def is_python(self, path): + """Test whether argument path is a Python script.""" + head, tail = os.path.splitext(path) + return tail.lower() in (".py", ".pyw") + + def run_cgi(self): + """Execute a CGI script.""" + path = self.path + dir, rest = self.cgi_info + + i = path.find('/', len(dir) + 1) + while i >= 0: + nextdir = path[:i] + nextrest = path[i+1:] + + scriptdir = self.translate_path(nextdir) + if os.path.isdir(scriptdir): + dir, rest = nextdir, nextrest + i = path.find('/', len(dir) + 1) + else: + break + + # find an explicit query string, if present. + i = rest.rfind('?') + if i >= 0: + rest, query = rest[:i], rest[i+1:] + else: + query = '' + + # dissect the part after the directory name into a script name & + # a possible additional path, to be stored in PATH_INFO. + i = rest.find('/') + if i >= 0: + script, rest = rest[:i], rest[i:] + else: + script, rest = rest, '' + + scriptname = dir + '/' + script + scriptfile = self.translate_path(scriptname) + if not os.path.exists(scriptfile): + self.send_error(404, "No such CGI script (%r)" % scriptname) + return + if not os.path.isfile(scriptfile): + self.send_error(403, "CGI script is not a plain file (%r)" % + scriptname) + return + ispy = self.is_python(scriptname) + if not ispy: + if not (self.have_fork or self.have_popen2 or self.have_popen3): + self.send_error(403, "CGI script is not a Python script (%r)" % + scriptname) + return + if not self.is_executable(scriptfile): + self.send_error(403, "CGI script is not executable (%r)" % + scriptname) + return + + # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html + # XXX Much of the following could be prepared ahead of time! + env = {} + env['SERVER_SOFTWARE'] = self.version_string() + env['SERVER_NAME'] = self.server.server_name + env['GATEWAY_INTERFACE'] = 'CGI/1.1' + env['SERVER_PROTOCOL'] = self.protocol_version + env['SERVER_PORT'] = str(self.server.server_port) + env['REQUEST_METHOD'] = self.command + uqrest = urllib.unquote(rest) + env['PATH_INFO'] = uqrest + env['PATH_TRANSLATED'] = self.translate_path(uqrest) + env['SCRIPT_NAME'] = scriptname + if query: + env['QUERY_STRING'] = query + host = self.address_string() + if host != self.client_address[0]: + env['REMOTE_HOST'] = host + env['REMOTE_ADDR'] = self.client_address[0] + authorization = self.headers.getheader("authorization") + if authorization: + authorization = authorization.split() + if len(authorization) == 2: + import base64, binascii + env['AUTH_TYPE'] = authorization[0] + if authorization[0].lower() == "basic": + try: + authorization = base64.decodestring(authorization[1]) + except binascii.Error: + pass + else: + authorization = authorization.split(':') + if len(authorization) == 2: + env['REMOTE_USER'] = authorization[0] + # XXX REMOTE_IDENT + if self.headers.typeheader is None: + env['CONTENT_TYPE'] = self.headers.type + else: + env['CONTENT_TYPE'] = self.headers.typeheader + length = self.headers.getheader('content-length') + if length: + env['CONTENT_LENGTH'] = length + referer = self.headers.getheader('referer') + if referer: + env['HTTP_REFERER'] = referer + accept = [] + for line in self.headers.getallmatchingheaders('accept'): + if line[:1] in "\t\n\r ": + accept.append(line.strip()) + else: + accept = accept + line[7:].split(',') + env['HTTP_ACCEPT'] = ','.join(accept) + ua = self.headers.getheader('user-agent') + if ua: + env['HTTP_USER_AGENT'] = ua + co = filter(None, self.headers.getheaders('cookie')) + if co: + env['HTTP_COOKIE'] = ', '.join(co) + # XXX Other HTTP_* headers + # Since we're setting the env in the parent, provide empty + # values to override previously set values + for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH', + 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'): + env.setdefault(k, "") + os.environ.update(env) + + self.send_response(200, "Script output follows") + + decoded_query = query.replace('+', ' ') + + if self.have_fork: + # Unix -- fork as we should + args = [script] + if '=' not in decoded_query: + args.append(decoded_query) + nobody = nobody_uid() + self.wfile.flush() # Always flush before forking + pid = os.fork() + if pid != 0: + # Parent + pid, sts = os.waitpid(pid, 0) + # throw away additional data [see bug #427345] + while select.select([self.rfile], [], [], 0)[0]: + if not self.rfile.read(1): + break + if sts: + self.log_error("CGI script exit status %#x", sts) + return + # Child + try: + try: + os.setuid(nobody) + except os.error: + pass + os.dup2(self.rfile.fileno(), 0) + os.dup2(self.wfile.fileno(), 1) + os.execve(scriptfile, args, os.environ) + except: + self.server.handle_error(self.request, self.client_address) + os._exit(127) + + elif self.have_popen2 or self.have_popen3: + # Windows -- use popen2 or popen3 to create a subprocess + import shutil + if self.have_popen3: + popenx = os.popen3 + else: + popenx = os.popen2 + cmdline = scriptfile + if self.is_python(scriptfile): + interp = sys.executable + if interp.lower().endswith("w.exe"): + # On Windows, use python.exe, not pythonw.exe + interp = interp[:-5] + interp[-4:] + cmdline = "%s -u %s" % (interp, cmdline) + if '=' not in query and '"' not in query: + cmdline = '%s "%s"' % (cmdline, query) + self.log_message("command: %s", cmdline) + try: + nbytes = int(length) + except (TypeError, ValueError): + nbytes = 0 + files = popenx(cmdline, 'b') + fi = files[0] + fo = files[1] + if self.have_popen3: + fe = files[2] + if self.command.lower() == "post" and nbytes > 0: + data = self.rfile.read(nbytes) + fi.write(data) + # throw away additional data [see bug #427345] + while select.select([self.rfile._sock], [], [], 0)[0]: + if not self.rfile._sock.recv(1): + break + fi.close() + shutil.copyfileobj(fo, self.wfile) + if self.have_popen3: + errors = fe.read() + fe.close() + if errors: + self.log_error('%s', errors) + sts = fo.close() + if sts: + self.log_error("CGI script exit status %#x", sts) + else: + self.log_message("CGI script exited OK") + + else: + # Other O.S. -- execute script in this process + save_argv = sys.argv + save_stdin = sys.stdin + save_stdout = sys.stdout + save_stderr = sys.stderr + try: + save_cwd = os.getcwd() + try: + sys.argv = [scriptfile] + if '=' not in decoded_query: + sys.argv.append(decoded_query) + sys.stdout = self.wfile + sys.stdin = self.rfile + execfile(scriptfile, {"__name__": "__main__"}) + finally: + sys.argv = save_argv + sys.stdin = save_stdin + sys.stdout = save_stdout + sys.stderr = save_stderr + os.chdir(save_cwd) + except SystemExit, sts: + self.log_error("CGI script exit status %s", str(sts)) + else: + self.log_message("CGI script exited OK") + + +nobody = None + +def nobody_uid(): + """Internal routine to get nobody's uid""" + global nobody + if nobody: + return nobody + try: + import pwd + except ImportError: + return -1 + try: + nobody = pwd.getpwnam('nobody')[2] + except KeyError: + nobody = 1 + max(map(lambda x: x[2], pwd.getpwall())) + return nobody + + +def executable(path): + """Test for executable file.""" + try: + st = os.stat(path) + except os.error: + return False + return st.st_mode & 0111 != 0 + + +def test(HandlerClass = CGIHTTPRequestHandler, + ServerClass = BaseHTTPServer.HTTPServer): + SimpleHTTPServer.test(HandlerClass, ServerClass) + + +if __name__ == '__main__': + test() diff --git a/language-python-test/test/CPython_test_suite_v2/CGIHTTPServer.test b/language-python-test/test/CPython_test_suite_v2/CGIHTTPServer.test new file mode 100644 index 0000000..86e4fde --- /dev/null +++ b/language-python-test/test/CPython_test_suite_v2/CGIHTTPServer.test @@ -0,0 +1,5 @@ +language-python-roundtrip 2 CGIHTTPServer.py +<<< +>>> +>>>2 +>>>=0 diff --git a/language-python-test/test/CPython_test_suite_v2/ConfigParser.py b/language-python-test/test/CPython_test_suite_v2/ConfigParser.py new file mode 100644 index 0000000..b6af6f9 --- /dev/null +++ b/language-python-test/test/CPython_test_suite_v2/ConfigParser.py @@ -0,0 +1,669 @@ +"""Configuration file parser. + +A setup file consists of sections, lead by a "[section]" header, +and followed by "name: value" entries, with continuations and such in +the style of RFC 822. + +The option values can contain format strings which refer to other values in +the same section, or values in a special [DEFAULT] section. + +For example: + + something: %(dir)s/whatever + +would resolve the "%(dir)s" to the value of dir. All reference +expansions are done late, on demand. + +Intrinsic defaults can be specified by passing them into the +ConfigParser constructor as a dictionary. + +class: + +ConfigParser -- responsible for parsing a list of + configuration files, and managing the parsed database. + + methods: + + __init__(defaults=None) + create the parser and specify a dictionary of intrinsic defaults. The + keys must be strings, the values must be appropriate for %()s string + interpolation. Note that `__name__' is always an intrinsic default; + its value is the section's name. + + sections() + return all the configuration section names, sans DEFAULT + + has_section(section) + return whether the given section exists + + has_option(section, option) + return whether the given option exists in the given section + + options(section) + return list of configuration options for the named section + + read(filenames) + read and parse the list of named configuration files, given by + name. A single filename is also allowed. Non-existing files + are ignored. Return list of successfully read files. + + readfp(fp, filename=None) + read and parse one configuration file, given as a file object. + The filename defaults to fp.name; it is only used in error + messages (if fp has no `name' attribute, the string `' is used). + + get(section, option, raw=False, vars=None) + return a string value for the named option. All % interpolations are + expanded in the return values, based on the defaults passed into the + constructor and the DEFAULT section. Additional substitutions may be + provided using the `vars' argument, which must be a dictionary whose + contents override any pre-existing defaults. + + getint(section, options) + like get(), but convert value to an integer + + getfloat(section, options) + like get(), but convert value to a float + + getboolean(section, options) + like get(), but convert value to a boolean (currently case + insensitively defined as 0, false, no, off for False, and 1, true, + yes, on for True). Returns False or True. + + items(section, raw=False, vars=None) + return a list of tuples with (name, value) for each option + in the section. + + remove_section(section) + remove the given file section and all its options + + remove_option(section, option) + remove the given option from the given section + + set(section, option, value) + set the given option + + write(fp) + write the configuration state in .ini format +""" + +import re + +__all__ = ["NoSectionError", "DuplicateSectionError", "NoOptionError", + "InterpolationError", "InterpolationDepthError", + "InterpolationSyntaxError", "ParsingError", + "MissingSectionHeaderError", + "ConfigParser", "SafeConfigParser", "RawConfigParser", + "DEFAULTSECT", "MAX_INTERPOLATION_DEPTH"] + +DEFAULTSECT = "DEFAULT" + +MAX_INTERPOLATION_DEPTH = 10 + + + +# exception classes +class Error(Exception): + """Base class for ConfigParser exceptions.""" + + def _get_message(self): + """Getter for 'message'; needed only to override deprecation in + BaseException.""" + return self.__message + + def _set_message(self, value): + """Setter for 'message'; needed only to override deprecation in + BaseException.""" + self.__message = value + + # BaseException.message has been deprecated since Python 2.6. To prevent + # DeprecationWarning from popping up over this pre-existing attribute, use + # a new property that takes lookup precedence. + message = property(_get_message, _set_message) + + def __init__(self, msg=''): + self.message = msg + Exception.__init__(self, msg) + + def __repr__(self): + return self.message + + __str__ = __repr__ + +class NoSectionError(Error): + """Raised when no section matches a requested option.""" + + def __init__(self, section): + Error.__init__(self, 'No section: %r' % (section,)) + self.section = section + +class DuplicateSectionError(Error): + """Raised when a section is multiply-created.""" + + def __init__(self, section): + Error.__init__(self, "Section %r already exists" % section) + self.section = section + +class NoOptionError(Error): + """A requested option was not found.""" + + def __init__(self, option, section): + Error.__init__(self, "No option %r in section: %r" % + (option, section)) + self.option = option + self.section = section + +class InterpolationError(Error): + """Base class for interpolation-related exceptions.""" + + def __init__(self, option, section, msg): + Error.__init__(self, msg) + self.option = option + self.section = section + +class InterpolationMissingOptionError(InterpolationError): + """A string substitution required a setting which was not available.""" + + def __init__(self, option, section, rawval, reference): + msg = ("Bad value substitution:\n" + "\tsection: [%s]\n" + "\toption : %s\n" + "\tkey : %s\n" + "\trawval : %s\n" + % (section, option, reference, rawval)) + InterpolationError.__init__(self, option, section, msg) + self.reference = reference + +class InterpolationSyntaxError(InterpolationError): + """Raised when the source text into which substitutions are made + does not conform to the required syntax.""" + +class InterpolationDepthError(InterpolationError): + """Raised when substitutions are nested too deeply.""" + + def __init__(self, option, section, rawval): + msg = ("Value interpolation too deeply recursive:\n" + "\tsection: [%s]\n" + "\toption : %s\n" + "\trawval : %s\n" + % (section, option, rawval)) + InterpolationError.__init__(self, option, section, msg) + +class ParsingError(Error): + """Raised when a configuration file does not follow legal syntax.""" + + def __init__(self, filename): + Error.__init__(self, 'File contains parsing errors: %s' % filename) + self.filename = filename + self.errors = [] + + def append(self, lineno, line): + self.errors.append((lineno, line)) + self.message += '\n\t[line %2d]: %s' % (lineno, line) + +class MissingSectionHeaderError(ParsingError): + """Raised when a key-value pair is found before any section header.""" + + def __init__(self, filename, lineno, line): + Error.__init__( + self, + 'File contains no section headers.\nfile: %s, line: %d\n%r' % + (filename, lineno, line)) + self.filename = filename + self.lineno = lineno + self.line = line + + +class RawConfigParser: + def __init__(self, defaults=None, dict_type=dict): + self._dict = dict_type + self._sections = self._dict() + self._defaults = self._dict() + if defaults: + for key, value in defaults.items(): + self._defaults[self.optionxform(key)] = value + + def defaults(self): + return self._defaults + + def sections(self): + """Return a list of section names, excluding [DEFAULT]""" + # self._sections will never have [DEFAULT] in it + return self._sections.keys() + + def add_section(self, section): + """Create a new section in the configuration. + + Raise DuplicateSectionError if a section by the specified name + already exists. Raise ValueError if name is DEFAULT or any of it's + case-insensitive variants. + """ + if section.lower() == "default": + raise ValueError, 'Invalid section name: %s' % section + + if section in self._sections: + raise DuplicateSectionError(section) + self._sections[section] = self._dict() + + def has_section(self, section): + """Indicate whether the named section is present in the configuration. + + The DEFAULT section is not acknowledged. + """ + return section in self._sections + + def options(self, section): + """Return a list of option names for the given section name.""" + try: + opts = self._sections[section].copy() + except KeyError: + raise NoSectionError(section) + opts.update(self._defaults) + if '__name__' in opts: + del opts['__name__'] + return opts.keys() + + def read(self, filenames): + """Read and parse a filename or a list of filenames. + + Files that cannot be opened are silently ignored; this is + designed so that you can specify a list of potential + configuration file locations (e.g. current directory, user's + home directory, systemwide directory), and all existing + configuration files in the list will be read. A single + filename may also be given. + + Return list of successfully read files. + """ + if isinstance(filenames, basestring): + filenames = [filenames] + read_ok = [] + for filename in filenames: + try: + fp = open(filename) + except IOError: + continue + self._read(fp, filename) + fp.close() + read_ok.append(filename) + return read_ok + + def readfp(self, fp, filename=None): + """Like read() but the argument must be a file-like object. + + The `fp' argument must have a `readline' method. Optional + second argument is the `filename', which if not given, is + taken from fp.name. If fp has no `name' attribute, `' is + used. + + """ + if filename is None: + try: + filename = fp.name + except AttributeError: + filename = '' + self._read(fp, filename) + + def get(self, section, option): + opt = self.optionxform(option) + if section not in self._sections: + if section != DEFAULTSECT: + raise NoSectionError(section) + if opt in self._defaults: + return self._defaults[opt] + else: + raise NoOptionError(option, section) + elif opt in self._sections[section]: + return self._sections[section][opt] + elif opt in self._defaults: + return self._defaults[opt] + else: + raise NoOptionError(option, section) + + def items(self, section): + try: + d2 = self._sections[section] + except KeyError: + if section != DEFAULTSECT: + raise NoSectionError(section) + d2 = self._dict() + d = self._defaults.copy() + d.update(d2) + if "__name__" in d: + del d["__name__"] + return d.items() + + def _get(self, section, conv, option): + return conv(self.get(section, option)) + + def getint(self, section, option): + return self._get(section, int, option) + + def getfloat(self, section, option): + return self._get(section, float, option) + + _boolean_states = {'1': True, 'yes': True, 'true': True, 'on': True, + '0': False, 'no': False, 'false': False, 'off': False} + + def getboolean(self, section, option): + v = self.get(section, option) + if v.lower() not in self._boolean_states: + raise ValueError, 'Not a boolean: %s' % v + return self._boolean_states[v.lower()] + + def optionxform(self, optionstr): + return optionstr.lower() + + def has_option(self, section, option): + """Check for the existence of a given option in a given section.""" + if not section or section == DEFAULTSECT: + option = self.optionxform(option) + return option in self._defaults + elif section not in self._sections: + return False + else: + option = self.optionxform(option) + return (option in self._sections[section] + or option in self._defaults) + + def set(self, section, option, value): + """Set an option.""" + if not section or section == DEFAULTSECT: + sectdict = self._defaults + else: + try: + sectdict = self._sections[section] + except KeyError: + raise NoSectionError(section) + sectdict[self.optionxform(option)] = value + + def write(self, fp): + """Write an .ini-format representation of the configuration state.""" + if self._defaults: + fp.write("[%s]\n" % DEFAULTSECT) + for (key, value) in self._defaults.items(): + fp.write("%s = %s\n" % (key, str(value).replace('\n', '\n\t'))) + fp.write("\n") + for section in self._sections: + fp.write("[%s]\n" % section) + for (key, value) in self._sections[section].items(): + if key != "__name__": + fp.write("%s = %s\n" % + (key, str(value).replace('\n', '\n\t'))) + fp.write("\n") + + def remove_option(self, section, option): + """Remove an option.""" + if not section or section == DEFAULTSECT: + sectdict = self._defaults + else: + try: + sectdict = self._sections[section] + except KeyError: + raise NoSectionError(section) + option = self.optionxform(option) + existed = option in sectdict + if existed: + del sectdict[option] + return existed + + def remove_section(self, section): + """Remove a file section.""" + existed = section in self._sections + if existed: + del self._sections[section] + return existed + + # + # Regular expressions for parsing section headers and options. + # + SECTCRE = re.compile( + r'\[' # [ + r'(?P

[^]]+)' # very permissive! + r'\]' # ] + ) + OPTCRE = re.compile( + r'(?P